improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
22#include "cabac.h"
23
24#include "mpegvideo.h"
25
26#undef NDEBUG
27#include <assert.h>
28
29#define MAX_DECOMPOSITIONS 8
30#define MAX_PLANES 4
31#define DWTELEM int
32#define QROOT 8
33
34static const int8_t quant3[256]={
35 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51};
52static const int8_t quant3b[256]={
53 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69};
70static const int8_t quant5[256]={
71 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
72 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
80-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
87};
88static const int8_t quant7[256]={
89 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
92 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
93 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
98-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
105};
106static const int8_t quant9[256]={
107 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
109 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
116-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
122-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
123};
124static const int8_t quant11[256]={
125 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
128 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
134-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
139-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
141};
142static const int8_t quant13[256]={
143 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
147 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
152-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
156-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
159};
160
161#define OBMC_MAX 64
162#if 0 //64*cubic
163static const uint8_t obmc32[1024]={
164 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
166 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
167 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
168 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
169 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
170 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
171 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
172 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
173 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
174 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
175 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
176 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
177 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
178 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
179 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
180 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
182 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
184 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
185 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
186 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
189 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
190 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
191 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
192 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
193 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
194 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
195 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
196//error:0.000022
197};
198static const uint8_t obmc16[256]={
199 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
200 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
201 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
202 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
203 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
204 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
205 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
206 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
207 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
209 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
211 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
212 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
213 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215//error:0.000033
216};
217#elif 1 // 64*linear
218static const uint8_t obmc32[1024]={
219 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
220 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
221 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
222 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
223 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
224 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
225 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
226 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
227 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
228 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
229 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
230 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
231 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
232 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
233 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
234 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
235 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
237 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
239 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
240 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
241 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
242 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
243 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
244 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
245 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
246 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
247 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
248 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
249 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
250 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
251 //error:0.000020
252};
253static const uint8_t obmc16[256]={
254 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
255 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
256 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
257 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
258 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
259 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
260 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
261 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
262 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
264 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
266 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
267 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
268 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
269 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
270//error:0.000015
271};
272#else //64*cos
273static const uint8_t obmc32[1024]={
274 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
276 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
277 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
278 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
279 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
280 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
281 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
282 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
283 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
284 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
285 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
286 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
287 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
288 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
289 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
290 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
292 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
294 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
295 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
296 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
299 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
300 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
301 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
302 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
303 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
304 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
305 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
306//error:0.000022
307};
308static const uint8_t obmc16[256]={
309 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
310 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
311 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
312 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
313 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
314 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
315 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
316 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
317 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
319 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
321 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
322 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
323 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325//error:0.000022
326};
327#endif
328
791e7b83
MN
329typedef struct SubBand{
330 int level;
331 int stride;
332 int width;
333 int height;
334 int qlog; ///< log(qscale)/log[2^(1/6)]
335 DWTELEM *buf;
336 struct SubBand *parent;
337 uint8_t state[/*7*2*/ 7 + 512][32];
338}SubBand;
339
340typedef struct Plane{
341 int width;
342 int height;
343 SubBand band[MAX_DECOMPOSITIONS][4];
344}Plane;
345
346typedef struct SnowContext{
347// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
348
349 AVCodecContext *avctx;
350 CABACContext c;
351 DSPContext dsp;
352 AVFrame input_picture;
353 AVFrame current_picture;
354 AVFrame last_picture;
355 AVFrame mconly_picture;
356// uint8_t q_context[16];
357 uint8_t header_state[32];
358 int keyframe;
359 int version;
360 int spatial_decomposition_type;
361 int temporal_decomposition_type;
362 int spatial_decomposition_count;
363 int temporal_decomposition_count;
364 DWTELEM *spatial_dwt_buffer;
365 DWTELEM *pred_buffer;
366 int colorspace_type;
367 int chroma_h_shift;
368 int chroma_v_shift;
369 int spatial_scalability;
370 int qlog;
371 int mv_scale;
372 int qbias;
373#define QBIAS_SHIFT 3
374 int b_width; //FIXME remove?
375 int b_height; //FIXME remove?
376 Plane plane[MAX_PLANES];
377 SubBand mb_band;
378 SubBand mv_band[2];
379
380 uint16_t *mb_type;
381 uint8_t *mb_mean;
382 uint32_t *dummy;
383 int16_t (*motion_val8)[2];
384 int16_t (*motion_val16)[2];
385 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
386}SnowContext;
387
388#define QEXPSHIFT 7 //FIXME try to change this to 0
389static const uint8_t qexp[8]={
390 128, 140, 152, 166, 181, 197, 215, 235
391// 64, 70, 76, 83, 91, 99, 108, 117
392// 32, 35, 38, 41, 45, 49, 54, 59
393// 16, 17, 19, 21, 23, 25, 27, 29
394// 8, 9, 10, 10, 11, 12, 13, 15
395};
396
397static inline int mirror(int v, int m){
398 if (v<0) return -v;
399 else if(v>m) return 2*m-v;
400 else return v;
401}
402
403static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
404 int i;
405
406 if(v){
407 const int a= ABS(v);
408 const int e= av_log2(a);
409#if 1
410 const int el= FFMIN(e, 10);
411 put_cabac(c, state+0, 0);
412
413 for(i=0; i<el; i++){
414 put_cabac(c, state+1+i, 1); //1..10
415 }
416 for(; i<e; i++){
417 put_cabac(c, state+1+9, 1); //1..10
418 }
419 put_cabac(c, state+1+FFMIN(i,9), 0);
420
421 for(i=e-1; i>=el; i--){
422 put_cabac(c, state+22+9, (a>>i)&1); //22..31
423 }
424 for(; i>=0; i--){
425 put_cabac(c, state+22+i, (a>>i)&1); //22..31
426 }
427
428 if(is_signed)
429 put_cabac(c, state+11 + el, v < 0); //11..21
430#else
431
432 put_cabac(c, state+0, 0);
433 if(e<=9){
434 for(i=0; i<e; i++){
435 put_cabac(c, state+1+i, 1); //1..10
436 }
437 put_cabac(c, state+1+i, 0);
438
439 for(i=e-1; i>=0; i--){
440 put_cabac(c, state+22+i, (a>>i)&1); //22..31
441 }
442
443 if(is_signed)
444 put_cabac(c, state+11 + e, v < 0); //11..21
445 }else{
446 for(i=0; i<e; i++){
447 put_cabac(c, state+1+FFMIN(i,9), 1); //1..10
448 }
449 put_cabac(c, state+1+FFMIN(i,9), 0);
450
451 for(i=e-1; i>=0; i--){
452 put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
453 }
454
455 if(is_signed)
456 put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
457 }
458#endif
459 }else{
460 put_cabac(c, state+0, 1);
461 }
462}
463
464static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
465 if(get_cabac(c, state+0))
466 return 0;
467 else{
468 int i, e, a, el;
469 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
470 for(e=0; e<10; e++){
471 if(get_cabac(c, state + 1 + e)==0) // 1..10
472 break;
473 }
474 el= e;
475
476 if(e==10){
477 while(get_cabac(c, state + 1 + 9)) //10
478 e++;
479 }
480 a= 1;
481 for(i=e-1; i>=el; i--){
482 a += a + get_cabac(c, state+22+9); //31
483 }
484 for(; i>=0; i--){
485 a += a + get_cabac(c, state+22+i); //22..31
486 }
487
488 if(is_signed && get_cabac(c, state+11 + el)) //11..21
489 return -a;
490 else
491 return a;
492 }
493}
494
4f4e9633
MN
495static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
496 int i;
0635cbfc 497 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
498
499 assert(v>=0);
0635cbfc
MN
500 assert(log2>=-4);
501
502 while(v >= r){
503 put_cabac(c, state+4+log2, 1);
504 v -= r;
4f4e9633 505 log2++;
0635cbfc 506 if(log2>0) r+=r;
4f4e9633 507 }
0635cbfc 508 put_cabac(c, state+4+log2, 0);
4f4e9633
MN
509
510 for(i=log2-1; i>=0; i--){
511 put_cabac(c, state+31-i, (v>>i)&1);
512 }
4f4e9633
MN
513}
514
515static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
516 int i;
0635cbfc 517 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
518 int v=0;
519
0635cbfc
MN
520 assert(log2>=-4);
521
522 while(get_cabac(c, state+4+log2)){
523 v+= r;
4f4e9633 524 log2++;
0635cbfc 525 if(log2>0) r+=r;
4f4e9633
MN
526 }
527
528 for(i=log2-1; i>=0; i--){
529 v+= get_cabac(c, state+31-i)<<i;
530 }
531
532 return v;
533}
534
791e7b83
MN
535static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
536 const int mirror_left= !highpass;
537 const int mirror_right= (width&1) ^ highpass;
538 const int w= (width>>1) - 1 + (highpass & width);
539 int i;
540
541#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
542 if(mirror_left){
543 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
544 dst += dst_step;
545 src += src_step;
546 }
547
548 for(i=0; i<w; i++){
549 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
550 }
551
552 if(mirror_right){
553 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
554 }
555}
556
557static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
558 const int mirror_left= !highpass;
559 const int mirror_right= (width&1) ^ highpass;
560 const int w= (width>>1) - 1 + (highpass & width);
561 int i;
562
563 if(mirror_left){
564 int r= 3*2*ref[0];
565 r += r>>4;
566 r += r>>8;
567 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
568 dst += dst_step;
569 src += src_step;
570 }
571
572 for(i=0; i<w; i++){
573 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
574 r += r>>4;
575 r += r>>8;
576 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
577 }
578
579 if(mirror_right){
580 int r= 3*2*ref[w*ref_step];
581 r += r>>4;
582 r += r>>8;
583 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
584 }
585}
586
587
588static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
589 int x, i;
590
591 for(x=start; x<width; x+=2){
592 int64_t sum=0;
593
594 for(i=0; i<n; i++){
595 int x2= x + 2*i - n + 1;
596 if (x2< 0) x2= -x2;
597 else if(x2>=width) x2= 2*width-x2-2;
598 sum += coeffs[i]*(int64_t)dst[x2];
599 }
600 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
601 else dst[x] += (sum + (1<<shift)/2)>>shift;
602 }
603}
604
605static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
606 int x, y, i;
607 for(y=start; y<height; y+=2){
608 for(x=0; x<width; x++){
609 int64_t sum=0;
610
611 for(i=0; i<n; i++){
612 int y2= y + 2*i - n + 1;
613 if (y2< 0) y2= -y2;
614 else if(y2>=height) y2= 2*height-y2-2;
615 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
616 }
617 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
618 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
619 }
620 }
621}
622
623#define SCALEX 1
624#define LX0 0
625#define LX1 1
626
de890c9b 627#if 0 // more accurate 9/7
791e7b83
MN
628#define N1 2
629#define SHIFT1 14
630#define COEFFS1 (int[]){-25987,-25987}
631#define N2 2
632#define SHIFT2 19
633#define COEFFS2 (int[]){-27777,-27777}
634#define N3 2
635#define SHIFT3 15
636#define COEFFS3 (int[]){28931,28931}
637#define N4 2
638#define SHIFT4 15
639#define COEFFS4 (int[]){14533,14533}
640#elif 1 // 13/7 CRF
641#define N1 4
642#define SHIFT1 4
643#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 644#define N2 4
791e7b83
MN
645#define SHIFT2 4
646#define COEFFS2 (int[]){-1,5,5,-1}
647#define N3 0
648#define SHIFT3 1
649#define COEFFS3 NULL
650#define N4 0
651#define SHIFT4 1
652#define COEFFS4 NULL
653#elif 1 // 3/5
654#define LX0 1
655#define LX1 0
656#define SCALEX 0.5
657#define N1 2
658#define SHIFT1 1
659#define COEFFS1 (int[]){1,1}
660#define N2 2
661#define SHIFT2 2
662#define COEFFS2 (int[]){-1,-1}
663#define N3 0
664#define SHIFT3 0
665#define COEFFS3 NULL
666#define N4 0
667#define SHIFT4 0
668#define COEFFS4 NULL
669#elif 1 // 11/5
670#define N1 0
671#define SHIFT1 1
672#define COEFFS1 NULL
673#define N2 2
674#define SHIFT2 2
675#define COEFFS2 (int[]){-1,-1}
676#define N3 2
677#define SHIFT3 0
678#define COEFFS3 (int[]){-1,-1}
679#define N4 4
680#define SHIFT4 7
681#define COEFFS4 (int[]){-5,29,29,-5}
682#define SCALEX 4
683#elif 1 // 9/7 CDF
684#define N1 2
685#define SHIFT1 7
686#define COEFFS1 (int[]){-203,-203}
687#define N2 2
688#define SHIFT2 12
689#define COEFFS2 (int[]){-217,-217}
690#define N3 2
691#define SHIFT3 7
692#define COEFFS3 (int[]){113,113}
693#define N4 2
694#define SHIFT4 9
695#define COEFFS4 (int[]){227,227}
696#define SCALEX 1
697#elif 1 // 7/5 CDF
698#define N1 0
699#define SHIFT1 1
700#define COEFFS1 NULL
701#define N2 2
702#define SHIFT2 2
703#define COEFFS2 (int[]){-1,-1}
704#define N3 2
705#define SHIFT3 0
706#define COEFFS3 (int[]){-1,-1}
707#define N4 2
708#define SHIFT4 4
709#define COEFFS4 (int[]){3,3}
710#elif 1 // 9/7 MN
711#define N1 4
712#define SHIFT1 4
713#define COEFFS1 (int[]){1,-9,-9,1}
714#define N2 2
715#define SHIFT2 2
716#define COEFFS2 (int[]){1,1}
717#define N3 0
718#define SHIFT3 1
719#define COEFFS3 NULL
720#define N4 0
721#define SHIFT4 1
722#define COEFFS4 NULL
723#else // 13/7 CRF
724#define N1 4
725#define SHIFT1 4
726#define COEFFS1 (int[]){1,-9,-9,1}
727#define N2 4
728#define SHIFT2 4
729#define COEFFS2 (int[]){-1,5,5,-1}
730#define N3 0
731#define SHIFT3 1
732#define COEFFS3 NULL
733#define N4 0
734#define SHIFT4 1
735#define COEFFS4 NULL
736#endif
737static void horizontal_decomposeX(int *b, int width){
738 int temp[width];
739 const int width2= width>>1;
740 const int w2= (width+1)>>1;
741 int A1,A2,A3,A4, x;
742
743 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
744 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
745 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
746 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
747
748 for(x=0; x<width2; x++){
749 temp[x ]= b[2*x ];
750 temp[x+w2]= b[2*x + 1];
751 }
752 if(width&1)
753 temp[x ]= b[2*x ];
754 memcpy(b, temp, width*sizeof(int));
755}
756
757static void horizontal_composeX(int *b, int width){
758 int temp[width];
759 const int width2= width>>1;
760 int A1,A2,A3,A4, x;
761 const int w2= (width+1)>>1;
762
763 memcpy(temp, b, width*sizeof(int));
764 for(x=0; x<width2; x++){
765 b[2*x ]= temp[x ];
766 b[2*x + 1]= temp[x+w2];
767 }
768 if(width&1)
769 b[2*x ]= temp[x ];
770
771 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
772 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
773 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
774 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
775}
776
777static void spatial_decomposeX(int *buffer, int width, int height, int stride){
778 int x, y;
779
780 for(y=0; y<height; y++){
781 for(x=0; x<width; x++){
782 buffer[y*stride + x] *= SCALEX;
783 }
784 }
785
786 for(y=0; y<height; y++){
787 horizontal_decomposeX(buffer + y*stride, width);
788 }
789
790 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
791 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
792 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
793 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
794}
795
796static void spatial_composeX(int *buffer, int width, int height, int stride){
797 int x, y;
798
799 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
800 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
801 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
802 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
803
804 for(y=0; y<height; y++){
805 horizontal_composeX(buffer + y*stride, width);
806 }
807
808 for(y=0; y<height; y++){
809 for(x=0; x<width; x++){
810 buffer[y*stride + x] /= SCALEX;
811 }
812 }
813}
814
815static void horizontal_decompose53i(int *b, int width){
816 int temp[width];
817 const int width2= width>>1;
818 int A1,A2,A3,A4, x;
819 const int w2= (width+1)>>1;
820
821 for(x=0; x<width2; x++){
822 temp[x ]= b[2*x ];
823 temp[x+w2]= b[2*x + 1];
824 }
825 if(width&1)
826 temp[x ]= b[2*x ];
827#if 0
828 A2= temp[1 ];
829 A4= temp[0 ];
830 A1= temp[0+width2];
831 A1 -= (A2 + A4)>>1;
832 A4 += (A1 + 1)>>1;
833 b[0+width2] = A1;
834 b[0 ] = A4;
835 for(x=1; x+1<width2; x+=2){
836 A3= temp[x+width2];
837 A4= temp[x+1 ];
838 A3 -= (A2 + A4)>>1;
839 A2 += (A1 + A3 + 2)>>2;
840 b[x+width2] = A3;
841 b[x ] = A2;
842
843 A1= temp[x+1+width2];
844 A2= temp[x+2 ];
845 A1 -= (A2 + A4)>>1;
846 A4 += (A1 + A3 + 2)>>2;
847 b[x+1+width2] = A1;
848 b[x+1 ] = A4;
849 }
850 A3= temp[width-1];
851 A3 -= A2;
852 A2 += (A1 + A3 + 2)>>2;
853 b[width -1] = A3;
854 b[width2-1] = A2;
855#else
856 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
857 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
858#endif
859}
860
861static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
862 int i;
863
864 for(i=0; i<width; i++){
865 b1[i] -= (b0[i] + b2[i])>>1;
866 }
867}
868
869static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
870 int i;
871
872 for(i=0; i<width; i++){
873 b1[i] += (b0[i] + b2[i] + 2)>>2;
874 }
875}
876
877static void spatial_decompose53i(int *buffer, int width, int height, int stride){
878 int x, y;
879 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
880 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
881
882 for(y=-2; y<height; y+=2){
883 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
884 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
885
886{START_TIMER
887 if(b1 <= b3) horizontal_decompose53i(b2, width);
888 if(y+2 < height) horizontal_decompose53i(b3, width);
889STOP_TIMER("horizontal_decompose53i")}
890
891{START_TIMER
892 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
893 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
894STOP_TIMER("vertical_decompose53i*")}
895
896 b0=b2;
897 b1=b3;
898 }
899}
900
901#define lift5 lift
902#if 1
903#define W_AM 3
904#define W_AO 0
905#define W_AS 1
906
907#define W_BM 1
908#define W_BO 8
909#define W_BS 4
910
911#undef lift5
912#define W_CM 9999
913#define W_CO 2
914#define W_CS 2
915
916#define W_DM 15
917#define W_DO 16
918#define W_DS 5
919#elif 0
920#define W_AM 55
921#define W_AO 16
922#define W_AS 5
923
924#define W_BM 3
925#define W_BO 32
926#define W_BS 6
927
928#define W_CM 127
929#define W_CO 64
930#define W_CS 7
931
932#define W_DM 7
933#define W_DO 8
934#define W_DS 4
935#elif 0
936#define W_AM 97
937#define W_AO 32
938#define W_AS 6
939
940#define W_BM 63
941#define W_BO 512
942#define W_BS 10
943
944#define W_CM 13
945#define W_CO 8
946#define W_CS 4
947
948#define W_DM 15
949#define W_DO 16
950#define W_DS 5
951
952#else
953
954#define W_AM 203
955#define W_AO 64
956#define W_AS 7
957
958#define W_BM 217
959#define W_BO 2048
960#define W_BS 12
961
962#define W_CM 113
963#define W_CO 64
964#define W_CS 7
965
966#define W_DM 227
967#define W_DO 128
968#define W_DS 9
969#endif
970static void horizontal_decompose97i(int *b, int width){
971 int temp[width];
972 const int w2= (width+1)>>1;
973
974 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
975 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
976 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
977 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
978}
979
980
981static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
982 int i;
983
984 for(i=0; i<width; i++){
985 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
986 }
987}
988
989static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
990 int i;
991
992 for(i=0; i<width; i++){
993#ifdef lift5
994 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
995#else
996 int r= 3*(b0[i] + b2[i]);
997 r+= r>>4;
998 r+= r>>8;
999 b1[i] += (r+W_CO)>>W_CS;
1000#endif
1001 }
1002}
1003
1004static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1005 int i;
1006
1007 for(i=0; i<width; i++){
1008 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1009 }
1010}
1011
1012static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1013 int i;
1014
1015 for(i=0; i<width; i++){
1016 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1017 }
1018}
1019
1020static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1021 int x, y;
1022 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1023 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1024 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1025 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1026
1027 for(y=-4; y<height; y+=2){
1028 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1029 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1030
1031{START_TIMER
1032 if(b3 <= b5) horizontal_decompose97i(b4, width);
1033 if(y+4 < height) horizontal_decompose97i(b5, width);
1034if(width>400){
1035STOP_TIMER("horizontal_decompose97i")
1036}}
1037
1038{START_TIMER
1039 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1040 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1041 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1042 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1043
1044if(width>400){
1045STOP_TIMER("vertical_decompose97i")
1046}}
1047
1048 b0=b2;
1049 b1=b3;
1050 b2=b4;
1051 b3=b5;
1052 }
1053}
1054
1055static void spatial_dwt(SnowContext *s, int *buffer, int width, int height, int stride){
1056 int level;
1057
1058 for(level=0; level<s->spatial_decomposition_count; level++){
1059 switch(s->spatial_decomposition_type){
1060 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1061 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1062 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1063 }
1064 }
1065}
1066
1067static void horizontal_compose53i(int *b, int width){
1068 int temp[width];
1069 const int width2= width>>1;
1070 const int w2= (width+1)>>1;
1071 int A1,A2,A3,A4, x;
1072
1073#if 0
1074 A2= temp[1 ];
1075 A4= temp[0 ];
1076 A1= temp[0+width2];
1077 A1 -= (A2 + A4)>>1;
1078 A4 += (A1 + 1)>>1;
1079 b[0+width2] = A1;
1080 b[0 ] = A4;
1081 for(x=1; x+1<width2; x+=2){
1082 A3= temp[x+width2];
1083 A4= temp[x+1 ];
1084 A3 -= (A2 + A4)>>1;
1085 A2 += (A1 + A3 + 2)>>2;
1086 b[x+width2] = A3;
1087 b[x ] = A2;
1088
1089 A1= temp[x+1+width2];
1090 A2= temp[x+2 ];
1091 A1 -= (A2 + A4)>>1;
1092 A4 += (A1 + A3 + 2)>>2;
1093 b[x+1+width2] = A1;
1094 b[x+1 ] = A4;
1095 }
1096 A3= temp[width-1];
1097 A3 -= A2;
1098 A2 += (A1 + A3 + 2)>>2;
1099 b[width -1] = A3;
1100 b[width2-1] = A2;
1101#else
1102 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1103 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1104#endif
1105 for(x=0; x<width2; x++){
1106 b[2*x ]= temp[x ];
1107 b[2*x + 1]= temp[x+w2];
1108 }
1109 if(width&1)
1110 b[2*x ]= temp[x ];
1111}
1112
1113static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1114 int i;
1115
1116 for(i=0; i<width; i++){
1117 b1[i] += (b0[i] + b2[i])>>1;
1118 }
1119}
1120
1121static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1122 int i;
1123
1124 for(i=0; i<width; i++){
1125 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1126 }
1127}
1128
1129static void spatial_compose53i(int *buffer, int width, int height, int stride){
1130 int x, y;
1131 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1132 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
1133
1134 for(y=-1; y<=height; y+=2){
1135 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1136 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1137
1138{START_TIMER
1139 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1140 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1141STOP_TIMER("vertical_compose53i*")}
1142
1143{START_TIMER
1144 if(y-1 >= 0) horizontal_compose53i(b0, width);
1145 if(b0 <= b2) horizontal_compose53i(b1, width);
1146STOP_TIMER("horizontal_compose53i")}
1147
1148 b0=b2;
1149 b1=b3;
1150 }
1151}
1152
1153
1154static void horizontal_compose97i(int *b, int width){
1155 int temp[width];
1156 const int w2= (width+1)>>1;
1157
1158 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1159 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1160 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1161 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1162}
1163
1164static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1165 int i;
1166
1167 for(i=0; i<width; i++){
1168 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1169 }
1170}
1171
1172static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1173 int i;
1174
1175 for(i=0; i<width; i++){
1176#ifdef lift5
1177 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1178#else
1179 int r= 3*(b0[i] + b2[i]);
1180 r+= r>>4;
1181 r+= r>>8;
1182 b1[i] -= (r+W_CO)>>W_CS;
1183#endif
1184 }
1185}
1186
1187static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1188 int i;
1189
1190 for(i=0; i<width; i++){
1191 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1192 }
1193}
1194
1195static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1196 int i;
1197
1198 for(i=0; i<width; i++){
1199 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1200 }
1201}
1202
1203static void spatial_compose97i(int *buffer, int width, int height, int stride){
1204 int x, y;
1205 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1206 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
1207 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1208 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1209
1210 for(y=-3; y<=height; y+=2){
1211 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1212 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1213
1214 if(stride == width && y+4 < height && 0){
1215 int x;
1216 for(x=0; x<width/2; x++)
1217 b5[x] += 64*2;
1218 for(; x<width; x++)
1219 b5[x] += 169*2;
1220 }
1221
1222{START_TIMER
1223 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1224 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1225 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1226 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1227if(width>400){
1228STOP_TIMER("vertical_compose97i")}}
1229
1230{START_TIMER
1231 if(y-1>= 0) horizontal_compose97i(b0, width);
1232 if(b0 <= b2) horizontal_compose97i(b1, width);
1233if(width>400 && b0 <= b2){
1234STOP_TIMER("horizontal_compose97i")}}
1235
1236 b0=b2;
1237 b1=b3;
1238 b2=b4;
1239 b3=b5;
1240 }
1241}
1242
1243static void spatial_idwt(SnowContext *s, int *buffer, int width, int height, int stride){
1244 int level;
1245
1246 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1247 switch(s->spatial_decomposition_type){
1248 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1249 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1250 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
1251 }
1252 }
1253}
1254
1255static const int hilbert[16][2]={
1256 {0,0}, {1,0}, {1,1}, {0,1},
1257 {0,2}, {0,3}, {1,3}, {1,2},
1258 {2,2}, {2,3}, {3,3}, {3,2},
1259 {3,1}, {2,1}, {2,0}, {3,0},
1260};
1261#if 0
1262-o o-
1263 | |
1264 o-o
1265
1266-o-o o-o-
1267 | |
1268 o-o o-o
1269 | |
1270 o o-o o
1271 | | | |
1272 o-o o-o
1273
1274 0112122312232334122323342334
1275 0123456789ABCDEF0123456789AB
1276 RLLRMRRLLRRMRLLMLRRLMLLRRLLM
1277
1278 4 B F 14 1B
1279 4 11 15 20 27
1280
1281-o o-o-o o-o-o o-
1282 | | | | | |
1283 o-o o-o o-o o-o
1284 | |
1285 o-o o-o o-o o-o
1286 | | | | | |
1287 o o-o-o o-o-o o
1288 | |
1289 o-o o-o-o-o o-o
1290 | | | |
1291 o-o o-o o-o o-o
1292 | | | |
1293 o o-o o o o-o o
1294 | | | | | | | |
1295 o-o o-o o-o o-o
1296
1297#endif
1298
1299#define SVI(a, i, x, y) \
1300{\
1301 a[i][0]= x;\
1302 a[i][1]= y;\
1303 i++;\
1304}
1305
1306static int sig_cmp(const void *a, const void *b){
1307 const int16_t* da = (const int16_t *) a;
1308 const int16_t* db = (const int16_t *) b;
1309
1310 if(da[1] != db[1]) return da[1] - db[1];
1311 else return da[0] - db[0];
1312}
1313
4f4e9633
MN
1314static int deint(unsigned int a){
1315 a &= 0x55555555; //0 1 2 3 4 5 6 7 8 9 A B C D E F
1316 a += a & 0x11111111; // 01 23 45 67 89 AB CD EF
1317 a += 3*(a & 0x0F0F0F0F);// 0123 4567 89AB CDEF
1318 a += 15*(a & 0x00FF00FF);// 01234567 89ABCDEF
1319 a +=255*(a & 0x0000FFFF);// 0123456789ABCDEF
1320 return a>>15;
1321}
1322
1323static void encode_subband_z0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1324 const int level= b->level;
1325 const int w= b->width;
1326 const int h= b->height;
1327 int x, y, pos;
1328
1329 if(1){
1330 int run=0;
1331 int runs[w*h];
1332 int run_index=0;
1333 int count=0;
1334
1335 for(pos=0; ; pos++){
1336 int x= deint(pos );
1337 int y= deint(pos>>1);
1338 int v, p=0, pr=0, pd=0;
1339 int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1340
1341 if(x>=w || y>=h){
1342 if(x>=w && y>=h)
1343 break;
1344 continue;
1345 }
1346 count++;
1347
1348 v= src[x + y*stride];
1349
1350 if(y){
1351 t= src[x + (y-1)*stride];
1352 if(x){
1353 lt= src[x - 1 + (y-1)*stride];
1354 }
1355 if(x + 1 < w){
1356 /*rt= src[x + 1 + (y-1)*stride]*/;
1357 }
1358 }
1359 if(x){
1360 l= src[x - 1 + y*stride];
1361 /*if(x > 1){
1362 if(orientation==1) ll= src[y + (x-2)*stride];
1363 else ll= src[x - 2 + y*stride];
1364 }*/
1365 }
1366 if(parent){
1367 int px= x>>1;
1368 int py= y>>1;
1369 if(px<b->parent->width && py<b->parent->height){
1370 p= parent[px + py*2*stride];
1371 /*if(px+1<b->parent->width)
1372 pr= parent[px + 1 + py*2*stride];
1373 if(py+1<b->parent->height)
1374 pd= parent[px + (py+1)*2*stride];*/
1375 }
1376 }
1377 if(!(/*ll|*/l|lt|t|/*rt|*/p)){
1378 if(v){
1379 runs[run_index++]= run;
1380 run=0;
1381 }else{
1382 run++;
1383 }
1384 }
1385 }
1386 assert(count==w*h);
1387 runs[run_index++]= run;
1388 run_index=0;
1389 run= runs[run_index++];
1390
1391 put_symbol(&s->c, b->state[1], run, 0);
1392
1393 for(pos=0; ; pos++){
1394 int x= deint(pos );
1395 int y= deint(pos>>1);
1396 int v, p=0, pr=0, pd=0;
1397 int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1398
1399 if(x>=w || y>=h){
1400 if(x>=w && y>=h)
1401 break;
1402 continue;
1403 }
1404 v= src[x + y*stride];
1405
1406 if(y){
1407 t= src[x + (y-1)*stride];
1408 if(x){
1409 lt= src[x - 1 + (y-1)*stride];
1410 }
1411 if(x + 1 < w){
1412// rt= src[x + 1 + (y-1)*stride];
1413 }
1414 }
1415 if(x){
1416 l= src[x - 1 + y*stride];
1417 /*if(x > 1){
1418 if(orientation==1) ll= src[y + (x-2)*stride];
1419 else ll= src[x - 2 + y*stride];
1420 }*/
1421 }
1422
1423 if(parent){
1424 int px= x>>1;
1425 int py= y>>1;
1426 if(px<b->parent->width && py<b->parent->height){
1427 p= parent[px + py*2*stride];
1428/* if(px+1<b->parent->width)
1429 pr= parent[px + 1 + py*2*stride];
1430 if(py+1<b->parent->height)
1431 pd= parent[px + (py+1)*2*stride];*/
1432 }
1433 }
1434 if(/*ll|*/l|lt|t|/*rt|*/p){
1435 int context= av_log2(/*ABS(ll) + */2*(3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p)));
1436
1437 put_cabac(&s->c, &b->state[0][context], !!v);
1438 }else{
1439 if(!run){
1440 run= runs[run_index++];
1441 put_symbol(&s->c, b->state[1], run, 0);
1442 assert(v);
1443 }else{
1444 run--;
1445 assert(!v);
1446 }
1447 }
1448 if(v){
1449 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p));
1450
1451 put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
1452 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1453 }
1454 }
1455 }
1456}
1457
1458static void encode_subband_bp(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1459 const int level= b->level;
1460 const int w= b->width;
1461 const int h= b->height;
1462 int x, y;
1463
1464#if 0
1465 int plane;
1466 for(plane=24; plane>=0; plane--){
1467 int run=0;
1468 int runs[w*h];
1469 int run_index=0;
1470
1471 for(y=0; y<h; y++){
1472 for(x=0; x<w; x++){
1473 int v, lv, p=0;
1474 int d=0, r=0, rd=0, ld=0;
1475 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1476 v= src[x + y*stride];
1477
1478 if(y){
1479 t= src[x + (y-1)*stride];
1480 if(x){
1481 lt= src[x - 1 + (y-1)*stride];
1482 }
1483 if(x + 1 < w){
1484 rt= src[x + 1 + (y-1)*stride];
1485 }
1486 }
1487 if(x){
1488 l= src[x - 1 + y*stride];
1489 /*if(x > 1){
1490 if(orientation==1) ll= src[y + (x-2)*stride];
1491 else ll= src[x - 2 + y*stride];
1492 }*/
1493 }
1494 if(y+1<h){
1495 d= src[x + (y+1)*stride];
1496 if(x) ld= src[x - 1 + (y+1)*stride];
1497 if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1498 }
1499 if(x + 1 < w)
1500 r= src[x + 1 + y*stride];
1501 if(parent){
1502 int px= x>>1;
1503 int py= y>>1;
1504 if(px<b->parent->width && py<b->parent->height)
1505 p= parent[px + py*2*stride];
1506 }
1507#define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
1508 lv=v;
1509 HIDE( v, plane)
1510 HIDE(lv, plane+1)
1511 HIDE( p, plane)
1512 HIDE( l, plane)
1513 HIDE(lt, plane)
1514 HIDE( t, plane)
1515 HIDE(rt, plane)
1516 HIDE( r, plane+1)
1517 HIDE(rd, plane+1)
1518 HIDE( d, plane+1)
1519 HIDE(ld, plane+1)
1520 if(!(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv)){
1521 if(v){
1522 runs[run_index++]= run;
1523 run=0;
1524 }else{
1525 run++;
1526 }
1527 }
1528 }
1529 }
1530 runs[run_index++]= run;
1531 run_index=0;
1532 run= runs[run_index++];
1533
1534 put_symbol(&s->c, b->state[1], run, 0);
1535
1536 for(y=0; y<h; y++){
1537 for(x=0; x<w; x++){
1538 int v, p=0, lv;
1539 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1540 int d=0, r=0, rd=0, ld=0;
1541 v= src[x + y*stride];
1542
1543 if(y){
1544 t= src[x + (y-1)*stride];
1545 if(x){
1546 lt= src[x - 1 + (y-1)*stride];
1547 }
1548 if(x + 1 < w){
1549 rt= src[x + 1 + (y-1)*stride];
1550 }
1551 }
1552 if(x){
1553 l= src[x - 1 + y*stride];
1554 /*if(x > 1){
1555 if(orientation==1) ll= src[y + (x-2)*stride];
1556 else ll= src[x - 2 + y*stride];
1557 }*/
1558 }
1559 if(y+1<h){
1560 d= src[x + (y+1)*stride];
1561 if(x) ld= src[x - 1 + (y+1)*stride];
1562 if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1563 }
1564 if(x + 1 < w)
1565 r= src[x + 1 + y*stride];
1566
1567 if(parent){
1568 int px= x>>1;
1569 int py= y>>1;
1570 if(px<b->parent->width && py<b->parent->height)
1571 p= parent[px + py*2*stride];
1572 }
1573 lv=v;
1574 HIDE( v, plane)
1575 HIDE(lv, plane+1)
1576 HIDE( p, plane)
1577 HIDE( l, plane)
1578 HIDE(lt, plane)
1579 HIDE( t, plane)
1580 HIDE(rt, plane)
1581 HIDE( r, plane+1)
1582 HIDE(rd, plane+1)
1583 HIDE( d, plane+1)
1584 HIDE(ld, plane+1)
1585 if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
1586 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
1587 +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
1588
1589 if(lv) put_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)], !!(v-lv));
1590 else put_cabac(&s->c, &b->state[ 0][context], !!v);
1591 }else{
1592 assert(!lv);
1593 if(!run){
1594 run= runs[run_index++];
1595 put_symbol(&s->c, b->state[1], run, 0);
1596 assert(v);
1597 }else{
1598 run--;
1599 assert(!v);
1600 }
1601 }
1602 if(v && !lv){
1603 int context= clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
1604 + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
1605 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + context], v<0);
1606 }
1607 }
1608 }
1609 }
1610 return;
1611#endif
1612}
1613
1614static void encode_subband_X(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1615 const int level= b->level;
1616 const int w= b->width;
1617 const int h= b->height;
1618 int x, y;
1619
a8d73e56 1620#if 0
791e7b83
MN
1621 if(orientation==3 && parent && 0){
1622 int16_t candidate[w*h][2];
1623 uint8_t state[w*h];
1624 int16_t boarder[3][w*h*4][2];
1625 int16_t significant[w*h][2];
1626 int candidate_count=0;
1627 int boarder_count[3]={0,0,0};
1628 int significant_count=0;
1629 int rle_pos=0;
1630 int v, last_v;
1631 int primary= orientation==1;
1632
1633 memset(candidate, 0, sizeof(candidate));
1634 memset(state, 0, sizeof(state));
1635 memset(boarder, 0, sizeof(boarder));
1636
1637 for(y=0; y<h; y++){
1638 for(x=0; x<w; x++){
1639 if(parent[(x>>1) + (y>>1)*2*stride])
1640 SVI(candidate, candidate_count, x, y)
1641 }
1642 }
1643
1644 for(;;){
1645 while(candidate_count && !boarder_count[0] && !boarder_count[1] && !boarder_count[2]){
1646 candidate_count--;
1647 x= candidate[ candidate_count][0];
1648 y= candidate[ candidate_count][1];
1649 if(state[x + y*w])
1650 continue;
1651 state[x + y*w]= 1;
1652 v= !!src[x + y*stride];
1653 put_cabac(&s->c, &b->state[0][0], v);
1654 if(v){
1655 SVI(significant, significant_count, x,y)
1656 if(x && !state[x - 1 + y *w]) SVI(boarder[0],boarder_count[0],x-1,y )
1657 if(y && !state[x + (y-1)*w]) SVI(boarder[1],boarder_count[1],x ,y-1)
1658 if(x+1<w && !state[x + 1 + y *w]) SVI(boarder[0],boarder_count[0],x+1,y )
1659 if(y+1<h && !state[x + (y+1)*w]) SVI(boarder[1],boarder_count[1],x ,y+1)
1660 if(x && y && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1661 if(x && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1662 if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1663 if(x+1<w && y && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1664 }
1665 }
1666 while(!boarder_count[0] && !boarder_count[1] && !boarder_count[2] && rle_pos < w*h){
1667 int run=0;
1668 for(; rle_pos < w*h;){
1669 x= rle_pos % w; //FIXME speed
1670 y= rle_pos / w;
1671 rle_pos++;
1672 if(state[x + y*w])
1673 continue;
1674 state[x + y*w]= 1;
1675 v= !!src[x + y*stride];
1676 if(v){
1677 put_symbol(&s->c, b->state[1], run, 0);
1678 SVI(significant, significant_count, x,y)
1679 if(x && !state[x - 1 + y *w]) SVI(boarder[0],boarder_count[0],x-1,y )
1680 if(y && !state[x + (y-1)*w]) SVI(boarder[1],boarder_count[1],x ,y-1)
1681 if(x+1<w && !state[x + 1 + y *w]) SVI(boarder[0],boarder_count[0],x+1,y )
1682 if(y+1<h && !state[x + (y+1)*w]) SVI(boarder[1],boarder_count[1],x ,y+1)
1683 if(x && y && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1684 if(x && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1685 if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1686 if(x+1<w && y && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1687 break;
1688//FIXME note only right & down can be boarders
1689 }
1690 run++;
1691 }
1692 }
1693 if(!boarder_count[0] && !boarder_count[1] && !boarder_count[2])
1694 break;
1695
1696 while(boarder_count[0] || boarder_count[1] || boarder_count[2]){
1697 int index;
1698
1699 if (boarder_count[ primary]) index= primary;
1700 else if(boarder_count[1-primary]) index=1-primary;
1701 else index=2;
1702
1703 boarder_count[index]--;
1704 x= boarder[index][ boarder_count[index] ][0];
1705 y= boarder[index][ boarder_count[index] ][1];
1706 if(state[x + y*w]) //FIXME maybe check earlier
1707 continue;
1708 state[x + y*w]= 1;
1709 v= !!src[x + y*stride];
1710 put_cabac(&s->c, &b->state[0][index+1], v);
1711 if(v){
1712 SVI(significant, significant_count, x,y)
1713 if(x && !state[x - 1 + y *w]) SVI(boarder[0],boarder_count[0],x-1,y )
1714 if(y && !state[x + (y-1)*w]) SVI(boarder[1],boarder_count[1],x ,y-1)
1715 if(x+1<w && !state[x + 1 + y *w]) SVI(boarder[0],boarder_count[0],x+1,y )
1716 if(y+1<h && !state[x + (y+1)*w]) SVI(boarder[1],boarder_count[1],x ,y+1)
1717 if(x && y && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1718 if(x && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1719 if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1720 if(x+1<w && y && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1721 }
1722 }
1723 }
1724 //FIXME sort significant coeffs maybe
1725 if(1){
1726 qsort(significant, significant_count, sizeof(int16_t[2]), sig_cmp);
1727 }
1728
1729 last_v=1;
1730 while(significant_count){
1731 int context= 3 + quant7[last_v&0xFF]; //use significance of suroundings
1732 significant_count--;
1733 x= significant[significant_count][0];//FIXME try opposit direction
1734 y= significant[significant_count][1];
1735 v= src[x + y*stride];
1736 put_symbol(&s->c, b->state[context + 2], v, 1); //FIXME try to avoid first bit, try this with the old code too!!
1737 last_v= v;
1738 }
1739 }
1740#endif
4f4e9633
MN
1741}
1742
1743static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1744 const int level= b->level;
1745 const int w= b->width;
1746 const int h= b->height;
1747 int x, y;
1748
791e7b83 1749 if(1){
791e7b83 1750 int run=0;
a8d73e56 1751 int runs[w*h];
791e7b83
MN
1752 int run_index=0;
1753
791e7b83
MN
1754 for(y=0; y<h; y++){
1755 for(x=0; x<w; x++){
78486403 1756 int v, p=0;
6b2f6646 1757 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1758 v= src[x + y*stride];
791e7b83
MN
1759
1760 if(y){
a8d73e56 1761 t= src[x + (y-1)*stride];
791e7b83 1762 if(x){
a8d73e56 1763 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1764 }
1765 if(x + 1 < w){
a8d73e56 1766 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1767 }
1768 }
1769 if(x){
a8d73e56 1770 l= src[x - 1 + y*stride];
6b2f6646
MN
1771 /*if(x > 1){
1772 if(orientation==1) ll= src[y + (x-2)*stride];
1773 else ll= src[x - 2 + y*stride];
791e7b83
MN
1774 }*/
1775 }
78486403 1776 if(parent){
a8d73e56
MN
1777 int px= x>>1;
1778 int py= y>>1;
78486403
MN
1779 if(px<b->parent->width && py<b->parent->height)
1780 p= parent[px + py*2*stride];
1781 }
1782 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1783 if(v){
1784 runs[run_index++]= run;
1785 run=0;
1786 }else{
1787 run++;
1788 }
1789 }
1790 }
1791 }
1792 runs[run_index++]= run;
1793 run_index=0;
1794 run= runs[run_index++];
1795
4f4e9633 1796 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1797
1798 for(y=0; y<h; y++){
1799 for(x=0; x<w; x++){
78486403 1800 int v, p=0;
6b2f6646 1801 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1802 v= src[x + y*stride];
791e7b83
MN
1803
1804 if(y){
a8d73e56 1805 t= src[x + (y-1)*stride];
791e7b83 1806 if(x){
a8d73e56 1807 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1808 }
1809 if(x + 1 < w){
a8d73e56 1810 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1811 }
1812 }
1813 if(x){
a8d73e56 1814 l= src[x - 1 + y*stride];
6b2f6646
MN
1815 /*if(x > 1){
1816 if(orientation==1) ll= src[y + (x-2)*stride];
1817 else ll= src[x - 2 + y*stride];
791e7b83
MN
1818 }*/
1819 }
78486403 1820 if(parent){
a8d73e56
MN
1821 int px= x>>1;
1822 int py= y>>1;
78486403
MN
1823 if(px<b->parent->width && py<b->parent->height)
1824 p= parent[px + py*2*stride];
1825 }
1826 if(/*ll|*/l|lt|t|rt|p){
1827 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646
MN
1828
1829 put_cabac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1830 }else{
1831 if(!run){
1832 run= runs[run_index++];
4f4e9633
MN
1833
1834 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1835 assert(v);
1836 }else{
1837 run--;
1838 assert(!v);
1839 }
1840 }
1841 if(v){
78486403 1842 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1843
0635cbfc 1844 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
791e7b83
MN
1845 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1846 }
1847 }
1848 }
791e7b83 1849 }
791e7b83
MN
1850}
1851
4f4e9633
MN
1852static void encode_subband_dzr(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1853 const int level= b->level;
1854 const int w= b->width;
1855 const int h= b->height;
1856 int x, y;
1857
1858 if(1){
1859 int run[16]={0};
1860 int runs[16][w*h]; //FIXME do something about the size
1861 int run_index[16]={0};
1862 int positions[2][w];
1863 int distances[2][w];
1864 int dist_count=0;
1865 int i;
1866
1867 for(y=0; y<h; y++){
1868 int * pos = positions[ y&1];
1869 int *last_pos = positions[(y&1)^1];
1870 int * dist= distances[ y&1];
1871 int *last_dist= distances[(y&1)^1];
1872 int dist_index=0;
1873 int last_dist_index=0;
1874
1875 for(x=0; x<w; x++){
1876 int p=0, l=0, lt=0, t=0, rt=0;
1877 int v= src[x + y*stride];
1878
1879 if(y){
1880 t= src[x + (y-1)*stride];
1881 if(x){
1882 lt= src[x - 1 + (y-1)*stride];
1883 }
1884 if(x + 1 < w){
1885 rt= src[x + 1 + (y-1)*stride];
1886 }
1887 }
1888 if(x){
1889 l= src[x - 1 + y*stride];
1890 }
1891 if(parent){
1892 int px= x>>1;
1893 int py= y>>1;
1894 if(px<b->parent->width && py<b->parent->height)
1895 p= parent[px + py*2*stride];
1896 }
1897 if(last_dist_index < dist_count && last_pos[last_dist_index] == x){
1898 if(dist_index==0 || x - pos[dist_index-1] > dist[dist_index-1] - last_dist[last_dist_index]){
1899 pos[dist_index]= x;
1900 dist[dist_index++]= last_dist[last_dist_index];
1901 }
1902 last_dist_index++;
1903 }
1904
1905 if(!(l|lt|t|rt|p)){
1906 int cur_dist=w>>1;
1907 int run_class;
1908
1909 if(last_dist_index < dist_count)
1910 cur_dist= last_pos[last_dist_index] - x + y - last_dist[last_dist_index];
1911 if(dist_index)
1912 cur_dist= FFMIN(cur_dist, x - pos[dist_index-1] + y - dist[dist_index-1]);
1913 assert(cur_dist>=2);
1914 run_class= av_log2(cur_dist+62);
1915
1916 if(v){
1917 runs[run_class][run_index[run_class]++]= run[run_class];
1918 run[run_class]=0;
1919 }else{
1920 run[run_class]++;
1921 }
1922 }
1923 if(v){
1924 while(dist_index && x - pos[dist_index-1] <= y - dist[dist_index-1])
1925 dist_index--;
1926 pos[dist_index]= x;
1927 dist[dist_index++]= y;
1928 }
1929 }
1930 dist_count= dist_index;
1931 }
1932 for(i=0; i<12; i++){
1933 runs[i][run_index[i]++]= run[i];
1934 run_index[i]=0;
1935 run[i]=0;
1936 }
1937
1938 dist_count=0;
1939
1940 for(y=0; y<h; y++){
1941 int * pos = positions[ y&1];
1942 int *last_pos = positions[(y&1)^1];
1943 int * dist= distances[ y&1];
1944 int *last_dist= distances[(y&1)^1];
1945 int dist_index=0;
1946 int last_dist_index=0;
1947
1948 for(x=0; x<w; x++){
1949 int p=0, l=0, lt=0, t=0, rt=0;
1950 int v= src[x + y*stride];
1951
1952 if(y){
1953 t= src[x + (y-1)*stride];
1954 if(x){
1955 lt= src[x - 1 + (y-1)*stride];
1956 }
1957 if(x + 1 < w){
1958 rt= src[x + 1 + (y-1)*stride];
1959 }
1960 }
1961 if(x){
1962 l= src[x - 1 + y*stride];
1963 }
1964 if(parent){
1965 int px= x>>1;
1966 int py= y>>1;
1967 if(px<b->parent->width && py<b->parent->height)
1968 p= parent[px + py*2*stride];
1969 }
1970 if(last_dist_index < dist_count && last_pos[last_dist_index] == x){
1971 if(dist_index==0 || x - pos[dist_index-1] > dist[dist_index-1] - last_dist[last_dist_index]){
1972 pos[dist_index]= x;
1973 dist[dist_index++]= last_dist[last_dist_index];
1974 }
1975 last_dist_index++;
1976 }
1977 if(l|lt|t|rt|p){
1978 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1979
1980 put_cabac(&s->c, &b->state[0][context], !!v);
1981 }else{
1982 int cur_dist=w>>1;
1983 int run_class;
1984
1985 if(last_dist_index < dist_count)
1986 cur_dist= last_pos[last_dist_index] - x + y - last_dist[last_dist_index];
1987 if(dist_index)
1988 cur_dist= FFMIN(cur_dist, x - pos[dist_index-1] + y - dist[dist_index-1]);
1989 assert(cur_dist>=2);
1990 assert(!dist_index || (pos[dist_index-1] >= 0 && pos[dist_index-1] <w));
1991 assert(last_dist_index >= dist_count || (last_pos[last_dist_index] >= 0 && last_pos[last_dist_index] <w));
1992 assert(!dist_index || dist[dist_index-1] <= y);
1993 assert(last_dist_index >= dist_count || last_dist[last_dist_index] < y);
1994 assert(cur_dist <= y + FFMAX(x, w-x-1));
1995 run_class= av_log2(cur_dist+62);
1996
1997 if(!run_index[run_class]){
1998 run[run_class]= runs[run_class][run_index[run_class]++];
1999 put_symbol(&s->c, b->state[run_class+1], run[run_class], 0);
2000 }
2001 if(!run[run_class]){
2002 run[run_class]= runs[run_class][run_index[run_class]++];
2003 put_symbol(&s->c, b->state[run_class+1], run[run_class], 0);
2004 assert(v);
2005 }else{
2006 run[run_class]--;
2007 assert(!v);
2008 }
2009 }
2010 if(v){
2011 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
2012
2013 put_symbol(&s->c, b->state[context + 16], ABS(v)-1, 0);
2014 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
2015
2016 while(dist_index && x - pos[dist_index-1] <= y - dist[dist_index-1])
2017 dist_index--;
2018 pos[dist_index]= x;
2019 dist[dist_index++]= y;
2020 }
2021 }
2022 dist_count= dist_index;
2023 }
2024 }
2025}
2026
2027static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
2028// encode_subband_qtree(s, b, src, parent, stride, orientation);
2029// encode_subband_z0run(s, b, src, parent, stride, orientation);
2030 encode_subband_c0run(s, b, src, parent, stride, orientation);
2031// encode_subband_dzr(s, b, src, parent, stride, orientation);
2032}
2033
a8d73e56 2034static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
791e7b83
MN
2035 const int level= b->level;
2036 const int w= b->width;
2037 const int h= b->height;
2038 int x,y;
2039
2040 START_TIMER
4f4e9633
MN
2041#if 0
2042 for(y=0; y<b->height; y++)
2043 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
2044
2045 int plane;
2046 for(plane=24; plane>=0; plane--){
2047 int run;
2048
2049 run= get_symbol(&s->c, b->state[1], 0);
2050
2051#define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
2052
2053 for(y=0; y<h; y++){
2054 for(x=0; x<w; x++){
2055 int v, p=0, lv;
2056 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2057 int d=0, r=0, rd=0, ld=0;
2058 lv= src[x + y*stride];
2059
2060 if(y){
2061 t= src[x + (y-1)*stride];
2062 if(x){
2063 lt= src[x - 1 + (y-1)*stride];
2064 }
2065 if(x + 1 < w){
2066 rt= src[x + 1 + (y-1)*stride];
2067 }
2068 }
2069 if(x){
2070 l= src[x - 1 + y*stride];
2071 /*if(x > 1){
2072 if(orientation==1) ll= src[y + (x-2)*stride];
2073 else ll= src[x - 2 + y*stride];
2074 }*/
2075 }
2076 if(y+1<h){
2077 d= src[x + (y+1)*stride];
2078 if(x) ld= src[x - 1 + (y+1)*stride];
2079 if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
2080 }
2081 if(x + 1 < w)
2082 r= src[x + 1 + y*stride];
2083
2084 if(parent){
2085 int px= x>>1;
2086 int py= y>>1;
2087 if(px<b->parent->width && py<b->parent->height)
2088 p= parent[px + py*2*stride];
2089 }
2090 HIDE( p, plane)
2091 if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
2092 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
2093 +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
2094
2095 if(lv){
2096 assert(context + 8*av_log2(ABS(lv)) < 512 - 100);
2097 if(get_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)])){
2098 if(lv<0) v= lv - (1<<plane);
2099 else v= lv + (1<<plane);
2100 }else
2101 v=lv;
2102 }else{
2103 v= get_cabac(&s->c, &b->state[ 0][context]) << plane;
2104 }
2105 }else{
2106 assert(!lv);
2107 if(!run){
2108 run= get_symbol(&s->c, b->state[1], 0);
2109 v= 1<<plane;
2110 }else{
2111 run--;
2112 v=0;
2113 }
2114 }
2115 if(v && !lv){
2116 int context= clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
2117 + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
2118 if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + context]))
2119 v= -v;
2120 }
2121 src[x + y*stride]= v;
2122 }
2123 }
2124 }
2125 return;
2126#endif
2127#if 0
2128 int tree[10][w*h]; //FIXME space waste ...
2129 int treedim[10][2];
2130 int lev;
2131 const int max_level= av_log2(2*FFMAX(w,h)-1);
2132 int w2=w, h2=h;
2133 memset(tree, 0, sizeof(tree));
791e7b83 2134
4f4e9633
MN
2135// assert(w%2==0 && h%2==0);
2136
2137 for(lev=max_level; lev>=0; lev--){
2138 treedim[lev][0]= w2;
2139 treedim[lev][1]= h2;
2140 w2= (w2+1)>>1;
2141 h2= (h2+1)>>1;
2142 }
2143
2144 for(lev=0; lev<=max_level; lev++){
2145 w2= treedim[lev][0];
2146 h2= treedim[lev][1];
2147 for(y=0; y<h2; y++){
2148 for(x=0; x<w2; x++){
2149 int l= 0, t=0;
2150 int context;
2151 if(lev && !tree[lev-1][x/2 + y/2*w])
2152 continue;
2153
2154 if(x) l= tree[lev][x - 1 + y*w];
2155 if(y) t= tree[lev][x + (y-1)*w];
2156
2157 context= lev + 8*(!!l) + 16*(!!t);
2158 tree[lev][x + y*w]= get_cabac(&s->c, &b->state[98][context]);
2159 }
2160 }
2161 }
2162 if(1){
2163 for(y=0; y<b->height; y++)
2164 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
2165
2166 for(y=0; y<h; y++){
2167 for(x=0; x<w; x++){
2168 int v, p=0;
2169 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2170
2171 if(y){
2172 t= src[x + (y-1)*stride];
2173 if(x){
2174 lt= src[x - 1 + (y-1)*stride];
2175 }
2176 if(x + 1 < w){
2177 rt= src[x + 1 + (y-1)*stride];
2178 }
2179 }
2180 if(x){
2181 l= src[x - 1 + y*stride];
2182 /*if(x > 1){
2183 if(orientation==1) ll= src[y + (x-2)*stride];
2184 else ll= src[x - 2 + y*stride];
2185 }*/
2186 }
2187 if(parent){
2188 int px= x>>1;
2189 int py= y>>1;
2190 if(px<b->parent->width && py<b->parent->height)
2191 p= parent[px + py*2*stride];
2192 }
2193 if(tree[max_level][x + y*w]){
2194 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
2195 v= get_symbol(&s->c, b->state[context + 2], 0) + 1;
2196 if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
2197 v= -v;
2198 src[x + y*stride]= v;
2199 }
2200 }
2201 }
2202 if(level+1 == s->spatial_decomposition_count){
2203 STOP_TIMER("decode_subband")
2204 }
2205
2206 return;
2207 }
2208#endif
791e7b83 2209 if(1){
791e7b83 2210 int run;
791e7b83
MN
2211
2212 for(y=0; y<b->height; y++)
2213 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
2214
4f4e9633 2215 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83
MN
2216 for(y=0; y<h; y++){
2217 for(x=0; x<w; x++){
78486403 2218 int v, p=0;
6b2f6646 2219 int /*ll=0, */l=0, lt=0, t=0, rt=0;
791e7b83
MN
2220
2221 if(y){
a8d73e56 2222 t= src[x + (y-1)*stride];
791e7b83 2223 if(x){
a8d73e56 2224 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
2225 }
2226 if(x + 1 < w){
a8d73e56 2227 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
2228 }
2229 }
2230 if(x){
a8d73e56 2231 l= src[x - 1 + y*stride];
6b2f6646
MN
2232 /*if(x > 1){
2233 if(orientation==1) ll= src[y + (x-2)*stride];
2234 else ll= src[x - 2 + y*stride];
791e7b83
MN
2235 }*/
2236 }
78486403 2237 if(parent){
a8d73e56
MN
2238 int px= x>>1;
2239 int py= y>>1;
78486403
MN
2240 if(px<b->parent->width && py<b->parent->height)
2241 p= parent[px + py*2*stride];
2242 }
2243 if(/*ll|*/l|lt|t|rt|p){
2244 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646
MN
2245
2246 v=get_cabac(&s->c, &b->state[0][context]);
791e7b83
MN
2247 }else{
2248 if(!run){
4f4e9633 2249 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83
MN
2250 //FIXME optimize this here
2251 //FIXME try to store a more naive run
2252 v=1;
2253 }else{
2254 run--;
2255 v=0;
2256 }
2257 }
2258 if(v){
78486403 2259 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
0635cbfc 2260 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
791e7b83
MN
2261 if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
2262 v= -v;
a8d73e56 2263 src[x + y*stride]= v;
791e7b83
MN
2264 }
2265 }
2266 }
2267 if(level+1 == s->spatial_decomposition_count){
2268 STOP_TIMER("decode_subband")
2269 }
2270
2271 return;
2272 }
791e7b83
MN
2273}
2274
2275static void reset_contexts(SnowContext *s){
2276 int plane_index, level, orientation;
2277
2278 for(plane_index=0; plane_index<2; plane_index++){
2279 for(level=0; level<s->spatial_decomposition_count; level++){
2280 for(orientation=level ? 1:0; orientation<4; orientation++){
2281 memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
2282 }
2283 }
2284 }
2285 memset(s->mb_band.state, 0, sizeof(s->mb_band.state));
2286 memset(s->mv_band[0].state, 0, sizeof(s->mv_band[0].state));
2287 memset(s->mv_band[1].state, 0, sizeof(s->mv_band[1].state));
2288 memset(s->header_state, 0, sizeof(s->header_state));
2289}
2290
2291static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2292 int x, y;
2293
2294 for(y=0; y < b_h+5; y++){
2295 for(x=0; x < b_w; x++){
2296 int a0= src[x + y*stride];
2297 int a1= src[x + 1 + y*stride];
2298 int a2= src[x + 2 + y*stride];
2299 int a3= src[x + 3 + y*stride];
2300 int a4= src[x + 4 + y*stride];
2301 int a5= src[x + 5 + y*stride];
2302// int am= 9*(a1+a2) - (a0+a3);
2303 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2304// int am= 18*(a2+a3) - 2*(a1+a4);
2305// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2306// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2307
2308// if(b_w==16) am= 8*(a1+a2);
2309
2310 if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) + am* dx + 128)>>8;
2311 else tmp[x + y*stride]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2312
2313/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2314 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2315 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2316 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2317 }
2318 }
2319 for(y=0; y < b_h; y++){
2320 for(x=0; x < b_w; x++){
2321 int a0= tmp[x + y *stride];
2322 int a1= tmp[x + (y + 1)*stride];
2323 int a2= tmp[x + (y + 2)*stride];
2324 int a3= tmp[x + (y + 3)*stride];
2325 int a4= tmp[x + (y + 4)*stride];
2326 int a5= tmp[x + (y + 5)*stride];
2327 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2328// int am= 18*(a2+a3) - 2*(a1+a4);
2329/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2330 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2331
2332// if(b_w==16) am= 8*(a1+a2);
2333
2334 if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) + am* dy + 128)>>8;
2335 else dst[x + y*stride]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2336
2337/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2338 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2339 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2340 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2341 }
2342 }
2343}
2344
2345#define mcb(dx,dy,b_w)\
2346static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
2347 uint8_t tmp[stride*(b_w+5)];\
2348 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2349}
2350
2351mcb( 0, 0,16)
2352mcb( 4, 0,16)
2353mcb( 8, 0,16)
2354mcb(12, 0,16)
2355mcb( 0, 4,16)
2356mcb( 4, 4,16)
2357mcb( 8, 4,16)
2358mcb(12, 4,16)
2359mcb( 0, 8,16)
2360mcb( 4, 8,16)
2361mcb( 8, 8,16)
2362mcb(12, 8,16)
2363mcb( 0,12,16)
2364mcb( 4,12,16)
2365mcb( 8,12,16)
2366mcb(12,12,16)
2367
2368#define mca(dx,dy,b_w)\
2369static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
2370 uint8_t tmp[stride*(b_w+5)];\
2371 assert(h==b_w);\
2372 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2373}
2374
2375mca( 0, 0,16)
2376mca( 8, 0,16)
2377mca( 0, 8,16)
2378mca( 8, 8,16)
2379
2380static void add_xblock(DWTELEM *dst, uint8_t *src, uint8_t *obmc, int s_x, int s_y, int b_w, int b_h, int mv_x, int mv_y, int w, int h, int dst_stride, int src_stride, int obmc_stride, int mb_type, int add){
2381 uint8_t tmp[src_stride*(b_h+5)]; //FIXME move to context to gurantee alignment
2382 int x,y;
2383
2384 if(s_x<0){
2385 obmc -= s_x;
2386 b_w += s_x;
2387 s_x=0;
2388 }else if(s_x + b_w > w){
2389 b_w = w - s_x;
2390 }
2391 if(s_y<0){
2392 obmc -= s_y*obmc_stride;
2393 b_h += s_y;
2394 s_y=0;
2395 }else if(s_y + b_h> h){
2396 b_h = h - s_y;
2397 }
2398
620ab797
MN
2399 if(b_w<=0 || b_h<=0) return;
2400
791e7b83
MN
2401 dst += s_x + s_y*dst_stride;
2402
2403 if(mb_type==1){
2404 src += s_x + s_y*src_stride;
2405 for(y=0; y < b_h; y++){
2406 for(x=0; x < b_w; x++){
2407 if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
2408 else dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
2409 }
2410 }
2411 }else{
2412 int dx= mv_x&15;
2413 int dy= mv_y&15;
2414// int dxy= (mv_x&1) + 2*(mv_y&1);
2415
2416 s_x += (mv_x>>4) - 2;
2417 s_y += (mv_y>>4) - 2;
2418 src += s_x + s_y*src_stride;
2419 //use dsputil
2420
2421 if( (unsigned)s_x >= w - b_w - 4
2422 || (unsigned)s_y >= h - b_h - 4){
2423 ff_emulated_edge_mc(tmp + 32, src, src_stride, b_w+5, b_h+5, s_x, s_y, w, h);
2424 src= tmp + 32;
2425 }
2426
2427 if(mb_type==0){
2428 mc_block(tmp, src, tmp + 64+8, src_stride, b_w, b_h, dx, dy);
2429 }else{
2430 int sum=0;
2431 for(y=0; y < b_h; y++){
2432 for(x=0; x < b_w; x++){
2433 sum += src[x+ y*src_stride];
2434 }
2435 }
2436 sum= (sum + b_h*b_w/2) / (b_h*b_w);
2437 for(y=0; y < b_h; y++){
2438 for(x=0; x < b_w; x++){
2439 tmp[x + y*src_stride]= sum;
2440 }
2441 }
2442 }
2443
2444 for(y=0; y < b_h; y++){
2445 for(x=0; x < b_w; x++){
2446 if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2447 else dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2448 }
2449 }
2450 }
2451}
2452
2453static void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2454 Plane *p= &s->plane[plane_index];
2455 const int mb_w= s->mb_band.width;
2456 const int mb_h= s->mb_band.height;
2457 const int mb_stride= s->mb_band.stride;
2458 int x, y, mb_x, mb_y;
2459 int scale = plane_index ? s->mv_scale : 2*s->mv_scale;
2460 int block_w = plane_index ? 8 : 16;
2461 uint8_t *obmc = plane_index ? obmc16 : obmc32;
2462 int obmc_stride= plane_index ? 16 : 32;
2463 int ref_stride= s->last_picture.linesize[plane_index];
2464 uint8_t *ref = s->last_picture.data[plane_index];
2465 int w= p->width;
2466 int h= p->height;
2467
2468if(s->avctx->debug&512){
2469 for(y=0; y<h; y++){
2470 for(x=0; x<w; x++){
2471 if(add) buf[x + y*w]+= 128*256;
2472 else buf[x + y*w]-= 128*256;
2473 }
2474 }
2475
2476 return;
2477}
2478 for(mb_y=-1; mb_y<=mb_h; mb_y++){
2479 for(mb_x=-1; mb_x<=mb_w; mb_x++){
2480 int index= clip(mb_x, 0, mb_w-1) + clip(mb_y, 0, mb_h-1)*mb_stride;
2481
2482 add_xblock(buf, ref, obmc,
2483 block_w*mb_x - block_w/2,
2484 block_w*mb_y - block_w/2,
2485 2*block_w, 2*block_w,
2486 s->mv_band[0].buf[index]*scale, s->mv_band[1].buf[index]*scale,
2487 w, h,
2488 w, ref_stride, obmc_stride,
2489 s->mb_band.buf[index], add);
2490
2491 }
2492 }
2493}
2494
2495static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2496 const int level= b->level;
2497 const int w= b->width;
2498 const int h= b->height;
2499 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2500 const int qmul= qexp[qlog&7]<<(qlog>>3);
da66b631
MN
2501 int x,y, thres1, thres2;
2502 START_TIMER
791e7b83
MN
2503
2504 assert(QROOT==8);
2505
2506 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
2507 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2508 thres2= 2*thres1;
791e7b83
MN
2509
2510 if(!bias){
2511 for(y=0; y<h; y++){
2512 for(x=0; x<w; x++){
da66b631
MN
2513 int i= src[x + y*stride];
2514
2515 if((unsigned)(i+thres1) > thres2){
2516 if(i>=0){
2517 i<<= QEXPSHIFT;
2518 i/= qmul; //FIXME optimize
2519 src[x + y*stride]= i;
2520 }else{
2521 i= -i;
2522 i<<= QEXPSHIFT;
2523 i/= qmul; //FIXME optimize
2524 src[x + y*stride]= -i;
2525 }
2526 }else
2527 src[x + y*stride]= 0;
791e7b83
MN
2528 }
2529 }
2530 }else{
2531 for(y=0; y<h; y++){
2532 for(x=0; x<w; x++){
2533 int i= src[x + y*stride];
2534
da66b631
MN
2535 if((unsigned)(i+thres1) > thres2){
2536 if(i>=0){
2537 i<<= QEXPSHIFT;
2538 i= (i + bias) / qmul; //FIXME optimize
2539 src[x + y*stride]= i;
2540 }else{
2541 i= -i;
2542 i<<= QEXPSHIFT;
2543 i= (i + bias) / qmul; //FIXME optimize
2544 src[x + y*stride]= -i;
2545 }
2546 }else
2547 src[x + y*stride]= 0;
791e7b83
MN
2548 }
2549 }
2550 }
da66b631
MN
2551 if(level+1 == s->spatial_decomposition_count){
2552// STOP_TIMER("quantize")
2553 }
791e7b83
MN
2554}
2555
2556static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2557 const int level= b->level;
2558 const int w= b->width;
2559 const int h= b->height;
2560 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2561 const int qmul= qexp[qlog&7]<<(qlog>>3);
2562 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2563 int x,y;
2564
2565 assert(QROOT==8);
2566
2567 for(y=0; y<h; y++){
2568 for(x=0; x<w; x++){
2569 int i= src[x + y*stride];
2570 if(i<0){
2571 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2572 }else if(i>0){
2573 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
2574 }
2575 }
2576 }
2577}
2578
2579static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2580 const int w= b->width;
2581 const int h= b->height;
2582 int x,y;
2583
2584 for(y=h-1; y>=0; y--){
2585 for(x=w-1; x>=0; x--){
2586 int i= x + y*stride;
2587
2588 if(x){
2589 if(use_median){
2590 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2591 else src[i] -= src[i - 1];
2592 }else{
2593 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2594 else src[i] -= src[i - 1];
2595 }
2596 }else{
2597 if(y) src[i] -= src[i - stride];
2598 }
2599 }
2600 }
2601}
2602
2603static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2604 const int w= b->width;
2605 const int h= b->height;
2606 int x,y;
2607
2608 for(y=0; y<h; y++){
2609 for(x=0; x<w; x++){
2610 int i= x + y*stride;
2611
2612 if(x){
2613 if(use_median){
2614 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2615 else src[i] += src[i - 1];
2616 }else{
2617 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2618 else src[i] += src[i - 1];
2619 }
2620 }else{
2621 if(y) src[i] += src[i - stride];
2622 }
2623 }
2624 }
2625}
2626
2627static void encode_header(SnowContext *s){
2628 int plane_index, level, orientation;
2629
2630 put_cabac(&s->c, s->header_state, s->keyframe); // state clearing stuff?
2631 if(s->keyframe){
2632 put_symbol(&s->c, s->header_state, s->version, 0);
2633 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2634 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2635 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2636 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
2637 put_symbol(&s->c, s->header_state, s->b_width, 0);
2638 put_symbol(&s->c, s->header_state, s->b_height, 0);
2639 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2640 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2641 put_cabac(&s->c, s->header_state, s->spatial_scalability);
2642// put_cabac(&s->c, s->header_state, s->rate_scalability);
2643
2644 for(plane_index=0; plane_index<2; plane_index++){
2645 for(level=0; level<s->spatial_decomposition_count; level++){
2646 for(orientation=level ? 1:0; orientation<4; orientation++){
2647 if(orientation==2) continue;
2648 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2649 }
2650 }
2651 }
2652 }
2653 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2654 put_symbol(&s->c, s->header_state, s->qlog, 1);
2655 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
2656 put_symbol(&s->c, s->header_state, s->qbias, 1);
2657}
2658
2659static int decode_header(SnowContext *s){
2660 int plane_index, level, orientation;
2661
2662 s->keyframe= get_cabac(&s->c, s->header_state);
2663 if(s->keyframe){
2664 s->version= get_symbol(&s->c, s->header_state, 0);
2665 if(s->version>0){
2666 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2667 return -1;
2668 }
2669 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2670 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2671 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2672 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2673 s->b_width= get_symbol(&s->c, s->header_state, 0);
2674 s->b_height= get_symbol(&s->c, s->header_state, 0);
2675 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2676 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2677 s->spatial_scalability= get_cabac(&s->c, s->header_state);
2678// s->rate_scalability= get_cabac(&s->c, s->header_state);
2679
2680 for(plane_index=0; plane_index<3; plane_index++){
2681 for(level=0; level<s->spatial_decomposition_count; level++){
2682 for(orientation=level ? 1:0; orientation<4; orientation++){
2683 int q;
2684 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2685 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2686 else q= get_symbol(&s->c, s->header_state, 1);
2687 s->plane[plane_index].band[level][orientation].qlog= q;
2688 }
2689 }
2690 }
2691 }
2692
2693 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2694 if(s->spatial_decomposition_type > 2){
2695 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2696 return -1;
2697 }
2698
2699 s->qlog= get_symbol(&s->c, s->header_state, 1);
2700 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2701 s->qbias= get_symbol(&s->c, s->header_state, 1);
2702
2703 return 0;
2704}
2705
2706static int common_init(AVCodecContext *avctx){
2707 SnowContext *s = avctx->priv_data;
2708 int width, height;
2709 int level, orientation, plane_index, dec;
2710
2711 s->avctx= avctx;
2712
2713 dsputil_init(&s->dsp, avctx);
2714
2715#define mcf(dx,dy)\
2716 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2717 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2718 mc_block ## dx ## dy;
2719
2720 mcf( 0, 0)
2721 mcf( 4, 0)
2722 mcf( 8, 0)
2723 mcf(12, 0)
2724 mcf( 0, 4)
2725 mcf( 4, 4)
2726 mcf( 8, 4)
2727 mcf(12, 4)
2728 mcf( 0, 8)
2729 mcf( 4, 8)
2730 mcf( 8, 8)
2731 mcf(12, 8)
2732 mcf( 0,12)
2733 mcf( 4,12)
2734 mcf( 8,12)
2735 mcf(12,12)
2736
2737#define mcfh(dx,dy)\
2738 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
2739 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2740 mc_block_hpel ## dx ## dy;
2741
2742 mcfh(0, 0)
2743 mcfh(8, 0)
2744 mcfh(0, 8)
2745 mcfh(8, 8)
2746
2747 dec= s->spatial_decomposition_count= 5;
2748 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2749
2750 s->chroma_h_shift= 1; //FIXME XXX
2751 s->chroma_v_shift= 1;
2752
2753// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2754
2755 s->b_width = (s->avctx->width +(1<<dec)-1)>>dec;
2756 s->b_height= (s->avctx->height+(1<<dec)-1)>>dec;
2757
2758 s->spatial_dwt_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2759 s->pred_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2760
2761 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2762
2763 for(plane_index=0; plane_index<3; plane_index++){
2764 int w= s->avctx->width;
2765 int h= s->avctx->height;
2766
2767 if(plane_index){
2768 w>>= s->chroma_h_shift;
2769 h>>= s->chroma_v_shift;
2770 }
2771 s->plane[plane_index].width = w;
2772 s->plane[plane_index].height= h;
2773av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2774 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2775 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2776 SubBand *b= &s->plane[plane_index].band[level][orientation];
2777
2778 b->buf= s->spatial_dwt_buffer;
2779 b->level= level;
2780 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2781 b->width = (w + !(orientation&1))>>1;
2782 b->height= (h + !(orientation>1))>>1;
2783
2784 if(orientation&1) b->buf += (w+1)>>1;
2785 if(orientation>1) b->buf += b->stride>>1;
2786
2787 if(level)
2788 b->parent= &s->plane[plane_index].band[level-1][orientation];
2789 }
2790 w= (w+1)>>1;
2791 h= (h+1)>>1;
2792 }
2793 }
2794
2795 //FIXME init_subband() ?
2796 s->mb_band.stride= s->mv_band[0].stride= s->mv_band[1].stride=
2797 s->mb_band.width = s->mv_band[0].width = s->mv_band[1].width = (s->avctx->width + 15)>>4;
2798 s->mb_band.height= s->mv_band[0].height= s->mv_band[1].height= (s->avctx->height+ 15)>>4;
2799 s->mb_band .buf= av_mallocz(s->mb_band .stride * s->mb_band .height*sizeof(DWTELEM));
2800 s->mv_band[0].buf= av_mallocz(s->mv_band[0].stride * s->mv_band[0].height*sizeof(DWTELEM));
2801 s->mv_band[1].buf= av_mallocz(s->mv_band[1].stride * s->mv_band[1].height*sizeof(DWTELEM));
2802
2803 reset_contexts(s);
2804/*
2805 width= s->width= avctx->width;
2806 height= s->height= avctx->height;
2807
2808 assert(width && height);
2809*/
2810 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2811
2812 return 0;
2813}
2814
2815
2816static void calculate_vissual_weight(SnowContext *s, Plane *p){
2817 int width = p->width;
2818 int height= p->height;
2819 int i, level, orientation, x, y;
2820
2821 for(level=0; level<s->spatial_decomposition_count; level++){
2822 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2823 SubBand *b= &p->band[level][orientation];
2824 DWTELEM *buf= b->buf;
2825 int64_t error=0;
2826
2827 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2828 buf[b->width/2 + b->height/2*b->stride]= 256*256;
2829 spatial_idwt(s, s->spatial_dwt_buffer, width, height, width);
2830 for(y=0; y<height; y++){
2831 for(x=0; x<width; x++){
2832 int64_t d= s->spatial_dwt_buffer[x + y*width];
2833 error += d*d;
2834 }
2835 }
2836
2837 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2838 av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2839 }
2840 }
2841}
2842
2843static int encode_init(AVCodecContext *avctx)
2844{
2845 SnowContext *s = avctx->priv_data;
2846 int i;
2847 int level, orientation, plane_index;
2848
2ff9ff5b
MN
2849 if(avctx->strict_std_compliance >= 0){
2850 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2851 "use vstrict=-1 to use it anyway\n");
2852 return -1;
2853 }
2854
791e7b83
MN
2855 common_init(avctx);
2856
2857 s->version=0;
2858
2859 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2860 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2861 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2862 s->mb_type = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int16_t));
2863 s->mb_mean = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int8_t ));
2864 s->dummy = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int32_t));
2865 h263_encode_init(&s->m); //mv_penalty
2866
2867 for(plane_index=0; plane_index<3; plane_index++){
2868 calculate_vissual_weight(s, &s->plane[plane_index]);
2869 }
2870
2871
2872 avctx->coded_frame= &s->current_picture;
2873 switch(avctx->pix_fmt){
2874// case PIX_FMT_YUV444P:
2875// case PIX_FMT_YUV422P:
2876 case PIX_FMT_YUV420P:
2877 case PIX_FMT_GRAY8:
2878// case PIX_FMT_YUV411P:
2879// case PIX_FMT_YUV410P:
2880 s->colorspace_type= 0;
2881 break;
2882/* case PIX_FMT_RGBA32:
2883 s->colorspace= 1;
2884 break;*/
2885 default:
2886 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2887 return -1;
2888 }
2889// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2890 s->chroma_h_shift= 1;
2891 s->chroma_v_shift= 1;
2892 return 0;
2893}
2894
2895static int frame_start(SnowContext *s){
2896 AVFrame tmp;
2897
2898 if(s->keyframe)
2899 reset_contexts(s);
2900
2901 tmp= s->last_picture;
2902 s->last_picture= s->current_picture;
2903 s->current_picture= tmp;
2904
2905 s->current_picture.reference= 1;
2906 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2907 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2908 return -1;
2909 }
2910
2911 return 0;
2912}
2913
2914static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2915 SnowContext *s = avctx->priv_data;
2916 CABACContext * const c= &s->c;
2917 AVFrame *pict = data;
2918 const int width= s->avctx->width;
2919 const int height= s->avctx->height;
2920 int used_count= 0;
2921 int log2_threshold, level, orientation, plane_index, i;
2922
791e7b83
MN
2923 ff_init_cabac_encoder(c, buf, buf_size);
2924 ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2925
2926 s->input_picture = *pict;
2927
2928 memset(s->header_state, 0, sizeof(s->header_state));
2929
2930 s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2931 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2932
e071139a 2933 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
791e7b83
MN
2934 //<64 >60
2935 s->qlog += 61;
2936
2937 for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
2938 s->mb_band.buf[i]= s->keyframe;
2939 }
2940
2941 frame_start(s);
2942
2943 if(pict->pict_type == P_TYPE){
2944 int block_width = (width +15)>>4;
2945 int block_height= (height+15)>>4;
2946 int stride= s->current_picture.linesize[0];
2947 uint8_t *src_plane= s->input_picture.data[0];
2948 int src_stride= s->input_picture.linesize[0];
2949 int x,y;
2950
2951 assert(s->current_picture.data[0]);
2952 assert(s->last_picture.data[0]);
2953
2954 s->m.avctx= s->avctx;
2955 s->m.current_picture.data[0]= s->current_picture.data[0];
2956 s->m. last_picture.data[0]= s-> last_picture.data[0];
2957 s->m. new_picture.data[0]= s-> input_picture.data[0];
2958 s->m.current_picture_ptr= &s->m.current_picture;
2959 s->m. last_picture_ptr= &s->m. last_picture;
2960 s->m.linesize=
2961 s->m. last_picture.linesize[0]=
2962 s->m. new_picture.linesize[0]=
2963 s->m.current_picture.linesize[0]= stride;
2964 s->m.width = width;
2965 s->m.height= height;
2966 s->m.mb_width = block_width;
2967 s->m.mb_height= block_height;
2968 s->m.mb_stride= s->m.mb_width+1;
2969 s->m.b8_stride= 2*s->m.mb_width+1;
2970 s->m.f_code=1;
2971 s->m.pict_type= pict->pict_type;
2972 s->m.me_method= s->avctx->me_method;
2973 s->m.me.scene_change_score=0;
2974 s->m.flags= s->avctx->flags;
2975 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2976 s->m.out_format= FMT_H263;
2977 s->m.unrestricted_mv= 1;
2978
2979 s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
2980 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
2981 s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
2982
2983 if(!s->motion_val8){
2984 s->motion_val8 = av_mallocz(s->m.b8_stride*block_height*2*2*sizeof(int16_t));
2985 s->motion_val16= av_mallocz(s->m.mb_stride*block_height*2*sizeof(int16_t));
2986 }
2987
2988 s->m.mb_type= s->mb_type;
2989
2990 //dummies, to avoid segfaults
2991 s->m.current_picture.mb_mean = s->mb_mean;
2992 s->m.current_picture.mb_var = (int16_t*)s->dummy;
2993 s->m.current_picture.mc_mb_var= (int16_t*)s->dummy;
2994 s->m.current_picture.mb_type = s->dummy;
2995
2996 s->m.current_picture.motion_val[0]= s->motion_val8;
2997 s->m.p_mv_table= s->motion_val16;
2998 s->m.dsp= s->dsp; //move
2999 ff_init_me(&s->m);
3000
3001
3002 s->m.me.pre_pass=1;
3003 s->m.me.dia_size= s->avctx->pre_dia_size;
3004 s->m.first_slice_line=1;
3005 for(y= block_height-1; y >= 0; y--) {
3006 uint8_t src[stride*16];
3007
3008 s->m.new_picture.data[0]= src - y*16*stride; //ugly
3009 s->m.mb_y= y;
3010 for(i=0; i<16 && i + 16*y<height; i++){
3011 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
3012 for(x=width; x<16*block_width; x++)
3013 src[i*stride+x]= src[i*stride+x-1];
3014 }
3015 for(; i<16 && i + 16*y<16*block_height; i++)
3016 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
3017
3018 for(x=block_width-1; x >=0 ;x--) {
3019 s->m.mb_x= x;
3020 ff_init_block_index(&s->m);
3021 ff_update_block_index(&s->m);
3022 ff_pre_estimate_p_frame_motion(&s->m, x, y);
3023 }
3024 s->m.first_slice_line=0;
3025 }
3026 s->m.me.pre_pass=0;
3027
3028
3029 s->m.me.dia_size= s->avctx->dia_size;
3030 s->m.first_slice_line=1;
3031 for (y = 0; y < block_height; y++) {
3032 uint8_t src[stride*16];
3033
3034 s->m.new_picture.data[0]= src - y*16*stride; //ugly
3035 s->m.mb_y= y;
3036
3037 assert(width <= stride);
3038 assert(width <= 16*block_width);
3039
3040 for(i=0; i<16 && i + 16*y<height; i++){
3041 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
3042 for(x=width; x<16*block_width; x++)
3043 src[i*stride+x]= src[i*stride+x-1];
3044 }
3045 for(; i<16 && i + 16*y<16*block_height; i++)
3046 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
3047
3048 for (x = 0; x < block_width; x++) {
3049 int mb_xy= x + y*(s->mb_band.stride);
3050 s->m.mb_x= x;
3051 ff_init_block_index(&s->m);
3052 ff_update_block_index(&s->m);
3053
3054 ff_estimate_p_frame_motion(&s->m, x, y);
3055
3056 s->mb_band .buf[mb_xy]= (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER)
3057 ? 0 : 2;
3058 s->mv_band[0].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][0];
3059 s->mv_band[1].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][1];
3060
3061 if(s->mb_band .buf[x + y*(s->mb_band.stride)]==2 && 0){
3062 int dc0=128, dc1=128, dc, dc2, dir;
3063 int offset= (s->avctx->flags & CODEC_FLAG_QPEL) ? 64 : 32;
3064
3065 dc =s->mb_mean[x + y *s->m.mb_stride ];
3066 if(x) dc0=s->mb_mean[x + y *s->m.mb_stride - 1];
3067 if(y) dc1=s->mb_mean[x + (y-1)*s->m.mb_stride ];
3068 dc2= (dc0+dc1)>>1;
3069#if 0
3070 if (ABS(dc0 - dc) < ABS(dc1 - dc) && ABS(dc0 - dc) < ABS(dc2 - dc))
3071 dir= 1;
3072 else if(ABS(dc0 - dc) >=ABS(dc1 - dc) && ABS(dc1 - dc) < ABS(dc2 - dc))
3073 dir=-1;
3074 else
3075 dir=0;
3076#endif
3077 if(ABS(dc0 - dc) < ABS(dc1 - dc) && x){
3078 s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + y*(s->mb_band.stride)-1] - offset;
3079 s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + y*(s->mb_band.stride)-1];
3080 s->mb_mean[x + y *s->m.mb_stride ]= dc0;
3081 }else if(y){
3082 s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + (y-1)*(s->mb_band.stride)];
3083 s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + (y-1)*(s->mb_band.stride)] - offset;
3084 s->mb_mean[x + y *s->m.mb_stride ]= dc1;
3085 }
3086 }
3087// s->mb_band .buf[x + y*(s->mb_band.stride)]=1; //FIXME intra only test
3088 }
3089 s->m.first_slice_line=0;
3090 }
3091 assert(s->m.pict_type == P_TYPE);
3092 if(s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3093 s->m.pict_type=
3094 pict->pict_type =I_TYPE;
3095 for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
3096 s->mb_band.buf[i]= 1;
3097 s->mv_band[0].buf[i]=
3098 s->mv_band[1].buf[i]= 0;
3099 }
3100 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3101 }
3102 }
3103
3104 s->m.first_slice_line=1;
3105
3106 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3107
3108 encode_header(s);
3109
3110 decorrelate(s, &s->mb_band , s->mb_band .buf, s->mb_band .stride, 0, 1);
3111 decorrelate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 0, 1);
3112 decorrelate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 0 ,1);
3113 encode_subband(s, &s->mb_band , s->mb_band .buf, NULL, s->mb_band .stride, 0);
3114 encode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
3115 encode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
3116
3117//FIXME avoid this
3118 correlate(s, &s->mb_band , s->mb_band .buf, s->mb_band .stride, 1, 1);
3119 correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
3120 correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
3121
3122 for(plane_index=0; plane_index<3; plane_index++){
3123 Plane *p= &s->plane[plane_index];
3124 int w= p->width;
3125 int h= p->height;
3126 int x, y;
3127 int bits= put_bits_count(&s->c.pb);
3128
3129 //FIXME optimize
3130#if QPRED
3131 memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
3132 predict_plane(s, s->pred_buffer, plane_index, 1);
3133 spatial_dwt(s, s->pred_buffer, w, h, w);
3134 for(level=0; level<s->spatial_decomposition_count; level++){
3135 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3136 SubBand *b= &p->band[level][orientation];
3137 int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
3138
3139 quantize (s, b, b->buf + delta, b->stride, s->qbias);
3140 dequantize(s, b, b->buf + delta, b->stride);
3141 }
3142 }
3143 for(y=0; y<h; y++){
3144 for(x=0; x<w; x++){
3145 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
3146 }
3147 }
3148 spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
3149 for(y=0; y<h; y++){
3150 for(x=0; x<w; x++){
3151 s->spatial_dwt_buffer[y*w + x]-= s->pred_buffer[y*w + x];
3152 }
3153 }
3154#else
3155 if(pict->data[plane_index]) //FIXME gray hack
3156 for(y=0; y<h; y++){
3157 for(x=0; x<w; x++){
3158 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
3159 }
3160 }
3161 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3162 spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
3163#endif
3164
3165 for(level=0; level<s->spatial_decomposition_count; level++){
3166 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3167 SubBand *b= &p->band[level][orientation];
3168
3169 quantize(s, b, b->buf, b->stride, s->qbias);
3170 if(orientation==0)
3171 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3172 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3173 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3174 if(orientation==0)
3175 correlate(s, b, b->buf, b->stride, 1, 0);
3176 }
3177 }
3178// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3179
3180 for(level=0; level<s->spatial_decomposition_count; level++){
3181 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3182 SubBand *b= &p->band[level][orientation];
3183
3184 dequantize(s, b, b->buf, b->stride);
3185 }
3186 }
3187
3188#if QPRED
3189 for(y=0; y<h; y++){
3190 for(x=0; x<w; x++){
3191 s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
3192 }
3193 }
3194 spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
3195#else
3196 spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
3197 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3198#endif
3199 //FIXME optimize
3200 for(y=0; y<h; y++){
3201 for(x=0; x<w; x++){
3202 int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3203 if(v&(~255)) v= ~(v>>31);
3204 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
3205 }
3206 }
3207 if(s->avctx->flags&CODEC_FLAG_PSNR){
3208 int64_t error= 0;
3209
3210 if(pict->data[plane_index]) //FIXME gray hack
3211 for(y=0; y<h; y++){
3212 for(x=0; x<w; x++){
3213 int d= s->spatial_dwt_buffer[y*w + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]*256;
3214 error += d*d;
3215 }
3216 }
3217 error= (error + 128*256)>>16;
3218 s->avctx->error[plane_index] += error;
3219 s->avctx->error[3] += error;
3220 }
3221 }
3222
3223 if(s->last_picture.data[0])
3224 avctx->release_buffer(avctx, &s->last_picture);
3225
3226 emms_c();
3227
3228 return put_cabac_terminate(c, 1);
3229}
3230
3231static void common_end(SnowContext *s){
3232 av_freep(&s->spatial_dwt_buffer);
3233 av_freep(&s->mb_band.buf);
3234 av_freep(&s->mv_band[0].buf);
3235 av_freep(&s->mv_band[1].buf);
3236
3237 av_freep(&s->m.me.scratchpad);
3238 av_freep(&s->m.me.map);
3239 av_freep(&s->m.me.score_map);
3240 av_freep(&s->mb_type);
3241 av_freep(&s->mb_mean);
3242 av_freep(&s->dummy);
3243 av_freep(&s->motion_val8);
3244 av_freep(&s->motion_val16);
3245}
3246
3247static int encode_end(AVCodecContext *avctx)
3248{
3249 SnowContext *s = avctx->priv_data;
3250
3251 common_end(s);
3252
3253 return 0;
3254}
3255
3256static int decode_init(AVCodecContext *avctx)
3257{
3258// SnowContext *s = avctx->priv_data;
3259
3260 common_init(avctx);
3261
3262 return 0;
3263}
3264
3265static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3266 SnowContext *s = avctx->priv_data;
3267 CABACContext * const c= &s->c;
3268 const int width= s->avctx->width;
3269 const int height= s->avctx->height;
3270 int bytes_read;
3271 AVFrame *picture = data;
3272 int log2_threshold, level, orientation, plane_index;
3273
3274
3275 /* no supplementary picture */
3276 if (buf_size == 0)
3277 return 0;
3278
3279 ff_init_cabac_decoder(c, buf, buf_size);
3280 ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3281
3282 memset(s->header_state, 0, sizeof(s->header_state));
3283
3284 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3285 decode_header(s);
3286
3287 frame_start(s);
3288 //keyframe flag dupliaction mess FIXME
3289 if(avctx->debug&FF_DEBUG_PICT_INFO)
3290 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3291
3292 decode_subband(s, &s->mb_band , s->mb_band .buf, NULL, s->mb_band .stride, 0);
3293 decode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
3294 decode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
3295 correlate(s, &s->mb_band , s->mb_band .buf, s->mb_band .stride, 1, 1);
3296 correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
3297 correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
3298
3299 for(plane_index=0; plane_index<3; plane_index++){
3300 Plane *p= &s->plane[plane_index];
3301 int w= p->width;
3302 int h= p->height;
3303 int x, y;
3304
3305if(s->avctx->debug&2048){
3306 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3307 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3308
3309 for(y=0; y<h; y++){
3310 for(x=0; x<w; x++){
3311 int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3312 if(v&(~255)) v= ~(v>>31);
3313 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3314 }
3315 }
3316}
3317 for(level=0; level<s->spatial_decomposition_count; level++){
3318 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3319 SubBand *b= &p->band[level][orientation];
3320
3321 decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3322 if(orientation==0)
3323 correlate(s, b, b->buf, b->stride, 1, 0);
3324 }
3325 }
3326if(!(s->avctx->debug&1024))
3327 for(level=0; level<s->spatial_decomposition_count; level++){
3328 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3329 SubBand *b= &p->band[level][orientation];
3330
3331 dequantize(s, b, b->buf, b->stride);
3332 }
3333 }
3334
3335#if QPRED
3336 memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
3337 predict_plane(s, s->pred_buffer, plane_index, 1);
3338 spatial_dwt(s, s->pred_buffer, w, h, w);
3339 for(level=0; level<s->spatial_decomposition_count; level++){
3340 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3341 SubBand *b= &p->band[level][orientation];
3342 int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
3343
3344 quantize (s, b, b->buf + delta, b->stride, s->qbias);
3345 dequantize(s, b, b->buf + delta, b->stride);
3346 }
3347 }
3348 for(y=0; y<h; y++){
3349 for(x=0; x<w; x++){
3350 s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
3351 }
3352 }
3353 spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
3354#else
3355 spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
3356 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3357#endif
3358
3359 //FIXME optimize
3360 for(y=0; y<h; y++){
3361 for(x=0; x<w; x++){
3362 int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3363 if(v&(~255)) v= ~(v>>31);
3364 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
3365 }
3366 }
3367 }
3368
3369 emms_c();
3370
3371 if(s->last_picture.data[0])
3372 avctx->release_buffer(avctx, &s->last_picture);
3373
3374if(!(s->avctx->debug&2048))
3375 *picture= s->current_picture;
3376else
3377 *picture= s->mconly_picture;
3378
3379 *data_size = sizeof(AVFrame);
3380
3381 bytes_read= get_cabac_terminate(c);
3382 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
3383
3384 return bytes_read;
3385}
3386
3387static int decode_end(AVCodecContext *avctx)
3388{
3389 SnowContext *s = avctx->priv_data;
3390
3391 common_end(s);
3392
3393 return 0;
3394}
3395
3396AVCodec snow_decoder = {
3397 "snow",
3398 CODEC_TYPE_VIDEO,
3399 CODEC_ID_SNOW,
3400 sizeof(SnowContext),
3401 decode_init,
3402 NULL,
3403 decode_end,
3404 decode_frame,
3405 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3406 NULL
3407};
3408
3409AVCodec snow_encoder = {
3410 "snow",
3411 CODEC_TYPE_VIDEO,
3412 CODEC_ID_SNOW,
3413 sizeof(SnowContext),
3414 encode_init,
3415 encode_frame,
3416 encode_end,
3417};
3418
3419
3420#if 0
3421#undef malloc
3422#undef free
3423#undef printf
3424
3425int main(){
3426 int width=256;
3427 int height=256;
3428 int buffer[2][width*height];
3429 SnowContext s;
3430 int i;
3431 s.spatial_decomposition_count=6;
3432 s.spatial_decomposition_type=1;
3433
3434 printf("testing 5/3 DWT\n");
3435 for(i=0; i<width*height; i++)
3436 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3437
3438 spatial_dwt(&s, buffer[0], width, height, width);
3439 spatial_idwt(&s, buffer[0], width, height, width);
3440
3441 for(i=0; i<width*height; i++)
3442 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3443
3444 printf("testing 9/7 DWT\n");
3445 s.spatial_decomposition_type=0;
3446 for(i=0; i<width*height; i++)
3447 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3448
3449 spatial_dwt(&s, buffer[0], width, height, width);
3450 spatial_idwt(&s, buffer[0], width, height, width);
3451
3452 for(i=0; i<width*height; i++)
3453 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3454
3455 printf("testing AC coder\n");
3456 memset(s.header_state, 0, sizeof(s.header_state));
3457 ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
3458 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3459
3460 for(i=-256; i<256; i++){
3461START_TIMER
3462 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3463STOP_TIMER("put_symbol")
3464 }
3465 put_cabac_terminate(&s.c, 1);
3466
3467 memset(s.header_state, 0, sizeof(s.header_state));
3468 ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
3469 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3470
3471 for(i=-256; i<256; i++){
3472 int j;
3473START_TIMER
3474 j= get_symbol(&s.c, s.header_state, 1);
3475STOP_TIMER("get_symbol")
3476 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3477 }
3478{
3479int level, orientation, x, y;
3480int64_t errors[8][4];
3481int64_t g=0;
3482
3483 memset(errors, 0, sizeof(errors));
3484 s.spatial_decomposition_count=3;
3485 s.spatial_decomposition_type=0;
3486 for(level=0; level<s.spatial_decomposition_count; level++){
3487 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3488 int w= width >> (s.spatial_decomposition_count-level);
3489 int h= height >> (s.spatial_decomposition_count-level);
3490 int stride= width << (s.spatial_decomposition_count-level);
3491 DWTELEM *buf= buffer[0];
3492 int64_t error=0;
3493
3494 if(orientation&1) buf+=w;
3495 if(orientation>1) buf+=stride>>1;
3496
3497 memset(buffer[0], 0, sizeof(int)*width*height);
3498 buf[w/2 + h/2*stride]= 256*256;
3499 spatial_idwt(&s, buffer[0], width, height, width);
3500 for(y=0; y<height; y++){
3501 for(x=0; x<width; x++){
3502 int64_t d= buffer[0][x + y*width];
3503 error += d*d;
3504 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3505 }
3506 if(ABS(height/2-y)<9 && level==2) printf("\n");
3507 }
3508 error= (int)(sqrt(error)+0.5);
3509 errors[level][orientation]= error;
3510 if(g) g=ff_gcd(g, error);
3511 else g= error;
3512 }
3513 }
3514 printf("static int const visual_weight[][4]={\n");
3515 for(level=0; level<s.spatial_decomposition_count; level++){
3516 printf(" {");
3517 for(orientation=0; orientation<4; orientation++){
3518 printf("%8lld,", errors[level][orientation]/g);
3519 }
3520 printf("},\n");
3521 }
3522 printf("};\n");
3523 {
3524 int level=2;
3525 int orientation=3;
3526 int w= width >> (s.spatial_decomposition_count-level);
3527 int h= height >> (s.spatial_decomposition_count-level);
3528 int stride= width << (s.spatial_decomposition_count-level);
3529 DWTELEM *buf= buffer[0];
3530 int64_t error=0;
3531
3532 buf+=w;
3533 buf+=stride>>1;
3534
3535 memset(buffer[0], 0, sizeof(int)*width*height);
3536#if 1
3537 for(y=0; y<height; y++){
3538 for(x=0; x<width; x++){
3539 int tab[4]={0,2,3,1};
3540 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3541 }
3542 }
3543 spatial_dwt(&s, buffer[0], width, height, width);
3544#else
3545 for(y=0; y<h; y++){
3546 for(x=0; x<w; x++){
3547 buf[x + y*stride ]=169;
3548 buf[x + y*stride-w]=64;
3549 }
3550 }
3551 spatial_idwt(&s, buffer[0], width, height, width);
3552#endif
3553 for(y=0; y<height; y++){
3554 for(x=0; x<width; x++){
3555 int64_t d= buffer[0][x + y*width];
3556 error += d*d;
3557 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3558 }
3559 if(ABS(height/2-y)<9) printf("\n");
3560 }
3561 }
3562
3563}
3564 return 0;
3565}
3566#endif
3567