increasing precission of the quantization parameter
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
28869757
MN
22
23#include "rangecoder.h"
24#define MID_STATE 128
791e7b83
MN
25
26#include "mpegvideo.h"
27
28#undef NDEBUG
29#include <assert.h>
30
31#define MAX_DECOMPOSITIONS 8
32#define MAX_PLANES 4
33#define DWTELEM int
a0a74ad9 34#define QSHIFT 5
c97de57c 35#define QROOT (1<<QSHIFT)
93fbdb5a 36#define LOSSLESS_QLOG -128
034aff03 37#define FRAC_BITS 8
791e7b83
MN
38
39static const int8_t quant3[256]={
40 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56};
57static const int8_t quant3b[256]={
58 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74};
538a3841
MN
75static const int8_t quant3bA[256]={
76 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92};
791e7b83
MN
93static const int8_t quant5[256]={
94 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110};
111static const int8_t quant7[256]={
112 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128};
129static const int8_t quant9[256]={
130 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146};
147static const int8_t quant11[256]={
148 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164};
165static const int8_t quant13[256]={
166 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182};
183
715a97f0
MN
184#define LOG2_OBMC_MAX 6
185#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
791e7b83
MN
186#if 0 //64*cubic
187static const uint8_t obmc32[1024]={
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220//error:0.000022
221};
222static const uint8_t obmc16[256]={
223 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239//error:0.000033
240};
241#elif 1 // 64*linear
242static const uint8_t obmc32[1024]={
243 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275 //error:0.000020
276};
277static const uint8_t obmc16[256]={
278 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294//error:0.000015
295};
296#else //64*cos
297static const uint8_t obmc32[1024]={
298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330//error:0.000022
331};
332static const uint8_t obmc16[256]={
333 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349//error:0.000022
350};
351#endif
352
155ec6ed
MN
353//linear *64
354static const uint8_t obmc8[64]={
355 1, 3, 5, 7, 7, 5, 3, 1,
356 3, 9,15,21,21,15, 9, 3,
357 5,15,25,35,35,25,15, 5,
358 7,21,35,49,49,35,21, 7,
359 7,21,35,49,49,35,21, 7,
360 5,15,25,35,35,25,15, 5,
361 3, 9,15,21,21,15, 9, 3,
362 1, 3, 5, 7, 7, 5, 3, 1,
363//error:0.000000
364};
365
366//linear *64
367static const uint8_t obmc4[16]={
368 4,12,12, 4,
36912,36,36,12,
37012,36,36,12,
371 4,12,12, 4,
372//error:0.000000
373};
374
375static const uint8_t *obmc_tab[4]={
376 obmc32, obmc16, obmc8, obmc4
377};
378
379typedef struct BlockNode{
380 int16_t mx;
381 int16_t my;
382 uint8_t color[3];
383 uint8_t type;
384//#define TYPE_SPLIT 1
385#define BLOCK_INTRA 1
386//#define TYPE_NOCOLOR 4
387 uint8_t level; //FIXME merge into type?
388}BlockNode;
389
390#define LOG2_MB_SIZE 4
391#define MB_SIZE (1<<LOG2_MB_SIZE)
392
a0d1931c
Y
393typedef struct x_and_coeff{
394 int16_t x;
538a3841 395 uint16_t coeff;
a0d1931c
Y
396} x_and_coeff;
397
791e7b83
MN
398typedef struct SubBand{
399 int level;
400 int stride;
401 int width;
402 int height;
403 int qlog; ///< log(qscale)/log[2^(1/6)]
404 DWTELEM *buf;
a0d1931c
Y
405 int buf_x_offset;
406 int buf_y_offset;
407 int stride_line; ///< Stride measured in lines, not pixels.
408 x_and_coeff * x_coeff;
791e7b83
MN
409 struct SubBand *parent;
410 uint8_t state[/*7*2*/ 7 + 512][32];
411}SubBand;
412
413typedef struct Plane{
414 int width;
415 int height;
416 SubBand band[MAX_DECOMPOSITIONS][4];
417}Plane;
418
a0d1931c
Y
419/** Used to minimize the amount of memory used in order to optimize cache performance. **/
420typedef struct {
421 DWTELEM * * line; ///< For use by idwt and predict_slices.
422 DWTELEM * * data_stack; ///< Used for internal purposes.
423 int data_stack_top;
424 int line_count;
425 int line_width;
426 int data_count;
427 DWTELEM * base_buffer; ///< Buffer that this structure is caching.
428} slice_buffer;
429
791e7b83
MN
430typedef struct SnowContext{
431// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
432
433 AVCodecContext *avctx;
28869757 434 RangeCoder c;
791e7b83
MN
435 DSPContext dsp;
436 AVFrame input_picture;
437 AVFrame current_picture;
438 AVFrame last_picture;
439 AVFrame mconly_picture;
440// uint8_t q_context[16];
441 uint8_t header_state[32];
155ec6ed 442 uint8_t block_state[128 + 32*128];
791e7b83 443 int keyframe;
19aa028d 444 int always_reset;
791e7b83
MN
445 int version;
446 int spatial_decomposition_type;
447 int temporal_decomposition_type;
448 int spatial_decomposition_count;
449 int temporal_decomposition_count;
450 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
451 int colorspace_type;
452 int chroma_h_shift;
453 int chroma_v_shift;
454 int spatial_scalability;
455 int qlog;
155ec6ed
MN
456 int lambda;
457 int lambda2;
791e7b83
MN
458 int mv_scale;
459 int qbias;
460#define QBIAS_SHIFT 3
155ec6ed
MN
461 int b_width;
462 int b_height;
463 int block_max_depth;
791e7b83 464 Plane plane[MAX_PLANES];
155ec6ed 465 BlockNode *block;
a0d1931c 466 slice_buffer sb;
155ec6ed 467
791e7b83
MN
468 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469}SnowContext;
470
f9e6ebf7
LM
471typedef struct {
472 DWTELEM *b0;
473 DWTELEM *b1;
474 DWTELEM *b2;
475 DWTELEM *b3;
476 int y;
477} dwt_compose_t;
478
a0d1931c
Y
479#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481
482static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
483{
484 int i;
485
486 buf->base_buffer = base_buffer;
487 buf->line_count = line_count;
488 buf->line_width = line_width;
489 buf->data_count = max_allocated_lines;
490 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
491 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
492
493 for (i = 0; i < max_allocated_lines; i++)
494 {
495 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
496 }
497
498 buf->data_stack_top = max_allocated_lines - 1;
499}
500
501static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
502{
503 int i;
504 int offset;
505 DWTELEM * buffer;
506
507// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
508
509 assert(buf->data_stack_top >= 0);
510// assert(!buf->line[line]);
511 if (buf->line[line])
512 return buf->line[line];
513
514 offset = buf->line_width * line;
515 buffer = buf->data_stack[buf->data_stack_top];
516 buf->data_stack_top--;
517 buf->line[line] = buffer;
518
519// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
520
521 return buffer;
522}
523
524static void slice_buffer_release(slice_buffer * buf, int line)
525{
526 int i;
527 int offset;
528 DWTELEM * buffer;
529
530 assert(line >= 0 && line < buf->line_count);
531 assert(buf->line[line]);
532
533 offset = buf->line_width * line;
534 buffer = buf->line[line];
535 buf->data_stack_top++;
536 buf->data_stack[buf->data_stack_top] = buffer;
537 buf->line[line] = NULL;
538
539// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
540}
541
542static void slice_buffer_flush(slice_buffer * buf)
543{
544 int i;
545 for (i = 0; i < buf->line_count; i++)
546 {
547 if (buf->line[i])
548 {
549// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
550 slice_buffer_release(buf, i);
551 }
552 }
553}
554
555static void slice_buffer_destroy(slice_buffer * buf)
556{
557 int i;
558 slice_buffer_flush(buf);
559
560 for (i = buf->data_count - 1; i >= 0; i--)
561 {
562 assert(buf->data_stack[i]);
563 av_free(buf->data_stack[i]);
564 }
565 assert(buf->data_stack);
566 av_free(buf->data_stack);
567 assert(buf->line);
568 av_free(buf->line);
569}
570
2554db9b
MN
571#ifdef __sgi
572// Avoid a name clash on SGI IRIX
573#undef qexp
574#endif
034aff03 575#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 576static uint8_t qexp[QROOT];
791e7b83
MN
577
578static inline int mirror(int v, int m){
579 if (v<0) return -v;
580 else if(v>m) return 2*m-v;
581 else return v;
582}
583
28869757 584static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
585 int i;
586
587 if(v){
588 const int a= ABS(v);
589 const int e= av_log2(a);
590#if 1
591 const int el= FFMIN(e, 10);
28869757 592 put_rac(c, state+0, 0);
791e7b83
MN
593
594 for(i=0; i<el; i++){
28869757 595 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
596 }
597 for(; i<e; i++){
28869757 598 put_rac(c, state+1+9, 1); //1..10
791e7b83 599 }
28869757 600 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
601
602 for(i=e-1; i>=el; i--){
28869757 603 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
604 }
605 for(; i>=0; i--){
28869757 606 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
607 }
608
609 if(is_signed)
28869757 610 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83
MN
611#else
612
28869757 613 put_rac(c, state+0, 0);
791e7b83
MN
614 if(e<=9){
615 for(i=0; i<e; i++){
28869757 616 put_rac(c, state+1+i, 1); //1..10
791e7b83 617 }
28869757 618 put_rac(c, state+1+i, 0);
791e7b83
MN
619
620 for(i=e-1; i>=0; i--){
28869757 621 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
622 }
623
624 if(is_signed)
28869757 625 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
626 }else{
627 for(i=0; i<e; i++){
28869757 628 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 629 }
28869757 630 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
631
632 for(i=e-1; i>=0; i--){
28869757 633 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
634 }
635
636 if(is_signed)
28869757 637 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
638 }
639#endif
640 }else{
28869757 641 put_rac(c, state+0, 1);
791e7b83
MN
642 }
643}
644
28869757
MN
645static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
646 if(get_rac(c, state+0))
791e7b83
MN
647 return 0;
648 else{
7c2425d2
LM
649 int i, e, a;
650 e= 0;
28869757 651 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 652 e++;
791e7b83 653 }
7c2425d2 654
791e7b83 655 a= 1;
7c2425d2 656 for(i=e-1; i>=0; i--){
28869757 657 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
658 }
659
28869757 660 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
661 return -a;
662 else
663 return a;
664 }
665}
666
28869757 667static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 668 int i;
0635cbfc 669 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
670
671 assert(v>=0);
0635cbfc
MN
672 assert(log2>=-4);
673
674 while(v >= r){
28869757 675 put_rac(c, state+4+log2, 1);
0635cbfc 676 v -= r;
4f4e9633 677 log2++;
0635cbfc 678 if(log2>0) r+=r;
4f4e9633 679 }
28869757 680 put_rac(c, state+4+log2, 0);
4f4e9633
MN
681
682 for(i=log2-1; i>=0; i--){
28869757 683 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 684 }
4f4e9633
MN
685}
686
28869757 687static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 688 int i;
0635cbfc 689 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
690 int v=0;
691
0635cbfc
MN
692 assert(log2>=-4);
693
28869757 694 while(get_rac(c, state+4+log2)){
0635cbfc 695 v+= r;
4f4e9633 696 log2++;
0635cbfc 697 if(log2>0) r+=r;
4f4e9633
MN
698 }
699
700 for(i=log2-1; i>=0; i--){
28869757 701 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
702 }
703
704 return v;
705}
706
791e7b83
MN
707static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
708 const int mirror_left= !highpass;
709 const int mirror_right= (width&1) ^ highpass;
710 const int w= (width>>1) - 1 + (highpass & width);
711 int i;
712
713#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
714 if(mirror_left){
715 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
716 dst += dst_step;
717 src += src_step;
718 }
719
720 for(i=0; i<w; i++){
721 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
722 }
723
724 if(mirror_right){
725 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
726 }
727}
728
729static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
730 const int mirror_left= !highpass;
731 const int mirror_right= (width&1) ^ highpass;
732 const int w= (width>>1) - 1 + (highpass & width);
733 int i;
734
735 if(mirror_left){
736 int r= 3*2*ref[0];
737 r += r>>4;
738 r += r>>8;
739 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
740 dst += dst_step;
741 src += src_step;
742 }
743
744 for(i=0; i<w; i++){
745 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
746 r += r>>4;
747 r += r>>8;
748 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
749 }
750
751 if(mirror_right){
752 int r= 3*2*ref[w*ref_step];
753 r += r>>4;
754 r += r>>8;
755 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
756 }
757}
758
759
aa25a462 760static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
761 int x, i;
762
763 for(x=start; x<width; x+=2){
764 int64_t sum=0;
765
766 for(i=0; i<n; i++){
767 int x2= x + 2*i - n + 1;
768 if (x2< 0) x2= -x2;
769 else if(x2>=width) x2= 2*width-x2-2;
770 sum += coeffs[i]*(int64_t)dst[x2];
771 }
772 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
773 else dst[x] += (sum + (1<<shift)/2)>>shift;
774 }
775}
776
aa25a462 777static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
778 int x, y, i;
779 for(y=start; y<height; y+=2){
780 for(x=0; x<width; x++){
781 int64_t sum=0;
782
783 for(i=0; i<n; i++){
784 int y2= y + 2*i - n + 1;
785 if (y2< 0) y2= -y2;
786 else if(y2>=height) y2= 2*height-y2-2;
787 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
788 }
789 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
790 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
791 }
792 }
793}
794
795#define SCALEX 1
796#define LX0 0
797#define LX1 1
798
de890c9b 799#if 0 // more accurate 9/7
791e7b83
MN
800#define N1 2
801#define SHIFT1 14
802#define COEFFS1 (int[]){-25987,-25987}
803#define N2 2
804#define SHIFT2 19
805#define COEFFS2 (int[]){-27777,-27777}
806#define N3 2
807#define SHIFT3 15
808#define COEFFS3 (int[]){28931,28931}
809#define N4 2
810#define SHIFT4 15
811#define COEFFS4 (int[]){14533,14533}
812#elif 1 // 13/7 CRF
813#define N1 4
814#define SHIFT1 4
815#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 816#define N2 4
791e7b83
MN
817#define SHIFT2 4
818#define COEFFS2 (int[]){-1,5,5,-1}
819#define N3 0
820#define SHIFT3 1
821#define COEFFS3 NULL
822#define N4 0
823#define SHIFT4 1
824#define COEFFS4 NULL
825#elif 1 // 3/5
826#define LX0 1
827#define LX1 0
828#define SCALEX 0.5
829#define N1 2
830#define SHIFT1 1
831#define COEFFS1 (int[]){1,1}
832#define N2 2
833#define SHIFT2 2
834#define COEFFS2 (int[]){-1,-1}
835#define N3 0
836#define SHIFT3 0
837#define COEFFS3 NULL
838#define N4 0
839#define SHIFT4 0
840#define COEFFS4 NULL
841#elif 1 // 11/5
842#define N1 0
843#define SHIFT1 1
844#define COEFFS1 NULL
845#define N2 2
846#define SHIFT2 2
847#define COEFFS2 (int[]){-1,-1}
848#define N3 2
849#define SHIFT3 0
850#define COEFFS3 (int[]){-1,-1}
851#define N4 4
852#define SHIFT4 7
853#define COEFFS4 (int[]){-5,29,29,-5}
854#define SCALEX 4
855#elif 1 // 9/7 CDF
856#define N1 2
857#define SHIFT1 7
858#define COEFFS1 (int[]){-203,-203}
859#define N2 2
860#define SHIFT2 12
861#define COEFFS2 (int[]){-217,-217}
862#define N3 2
863#define SHIFT3 7
864#define COEFFS3 (int[]){113,113}
865#define N4 2
866#define SHIFT4 9
867#define COEFFS4 (int[]){227,227}
868#define SCALEX 1
869#elif 1 // 7/5 CDF
870#define N1 0
871#define SHIFT1 1
872#define COEFFS1 NULL
873#define N2 2
874#define SHIFT2 2
875#define COEFFS2 (int[]){-1,-1}
876#define N3 2
877#define SHIFT3 0
878#define COEFFS3 (int[]){-1,-1}
879#define N4 2
880#define SHIFT4 4
881#define COEFFS4 (int[]){3,3}
882#elif 1 // 9/7 MN
883#define N1 4
884#define SHIFT1 4
885#define COEFFS1 (int[]){1,-9,-9,1}
886#define N2 2
887#define SHIFT2 2
888#define COEFFS2 (int[]){1,1}
889#define N3 0
890#define SHIFT3 1
891#define COEFFS3 NULL
892#define N4 0
893#define SHIFT4 1
894#define COEFFS4 NULL
895#else // 13/7 CRF
896#define N1 4
897#define SHIFT1 4
898#define COEFFS1 (int[]){1,-9,-9,1}
899#define N2 4
900#define SHIFT2 4
901#define COEFFS2 (int[]){-1,5,5,-1}
902#define N3 0
903#define SHIFT3 1
904#define COEFFS3 NULL
905#define N4 0
906#define SHIFT4 1
907#define COEFFS4 NULL
908#endif
aa25a462
RFI
909static void horizontal_decomposeX(DWTELEM *b, int width){
910 DWTELEM temp[width];
791e7b83
MN
911 const int width2= width>>1;
912 const int w2= (width+1)>>1;
913 int A1,A2,A3,A4, x;
914
915 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
916 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
917 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
918 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
919
920 for(x=0; x<width2; x++){
921 temp[x ]= b[2*x ];
922 temp[x+w2]= b[2*x + 1];
923 }
924 if(width&1)
925 temp[x ]= b[2*x ];
926 memcpy(b, temp, width*sizeof(int));
927}
928
aa25a462
RFI
929static void horizontal_composeX(DWTELEM *b, int width){
930 DWTELEM temp[width];
791e7b83
MN
931 const int width2= width>>1;
932 int A1,A2,A3,A4, x;
933 const int w2= (width+1)>>1;
934
935 memcpy(temp, b, width*sizeof(int));
936 for(x=0; x<width2; x++){
937 b[2*x ]= temp[x ];
938 b[2*x + 1]= temp[x+w2];
939 }
940 if(width&1)
941 b[2*x ]= temp[x ];
942
943 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
944 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
945 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
946 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
947}
948
aa25a462 949static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
950 int x, y;
951
952 for(y=0; y<height; y++){
953 for(x=0; x<width; x++){
954 buffer[y*stride + x] *= SCALEX;
955 }
956 }
957
958 for(y=0; y<height; y++){
959 horizontal_decomposeX(buffer + y*stride, width);
960 }
961
962 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
963 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
964 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
965 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
966}
967
aa25a462 968static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
969 int x, y;
970
971 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
972 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
973 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
974 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
975
976 for(y=0; y<height; y++){
977 horizontal_composeX(buffer + y*stride, width);
978 }
979
980 for(y=0; y<height; y++){
981 for(x=0; x<width; x++){
982 buffer[y*stride + x] /= SCALEX;
983 }
984 }
985}
986
aa25a462
RFI
987static void horizontal_decompose53i(DWTELEM *b, int width){
988 DWTELEM temp[width];
791e7b83
MN
989 const int width2= width>>1;
990 int A1,A2,A3,A4, x;
991 const int w2= (width+1)>>1;
992
993 for(x=0; x<width2; x++){
994 temp[x ]= b[2*x ];
995 temp[x+w2]= b[2*x + 1];
996 }
997 if(width&1)
998 temp[x ]= b[2*x ];
999#if 0
1000 A2= temp[1 ];
1001 A4= temp[0 ];
1002 A1= temp[0+width2];
1003 A1 -= (A2 + A4)>>1;
1004 A4 += (A1 + 1)>>1;
1005 b[0+width2] = A1;
1006 b[0 ] = A4;
1007 for(x=1; x+1<width2; x+=2){
1008 A3= temp[x+width2];
1009 A4= temp[x+1 ];
1010 A3 -= (A2 + A4)>>1;
1011 A2 += (A1 + A3 + 2)>>2;
1012 b[x+width2] = A3;
1013 b[x ] = A2;
1014
1015 A1= temp[x+1+width2];
1016 A2= temp[x+2 ];
1017 A1 -= (A2 + A4)>>1;
1018 A4 += (A1 + A3 + 2)>>2;
1019 b[x+1+width2] = A1;
1020 b[x+1 ] = A4;
1021 }
1022 A3= temp[width-1];
1023 A3 -= A2;
1024 A2 += (A1 + A3 + 2)>>2;
1025 b[width -1] = A3;
1026 b[width2-1] = A2;
1027#else
1028 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1029 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1030#endif
1031}
1032
aa25a462 1033static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1034 int i;
1035
1036 for(i=0; i<width; i++){
1037 b1[i] -= (b0[i] + b2[i])>>1;
1038 }
1039}
1040
aa25a462 1041static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1042 int i;
1043
1044 for(i=0; i<width; i++){
1045 b1[i] += (b0[i] + b2[i] + 2)>>2;
1046 }
1047}
1048
aa25a462 1049static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1050 int y;
791e7b83
MN
1051 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1052 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1053
1054 for(y=-2; y<height; y+=2){
1055 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1056 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1057
1058{START_TIMER
1059 if(b1 <= b3) horizontal_decompose53i(b2, width);
1060 if(y+2 < height) horizontal_decompose53i(b3, width);
1061STOP_TIMER("horizontal_decompose53i")}
1062
1063{START_TIMER
1064 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1065 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1066STOP_TIMER("vertical_decompose53i*")}
1067
1068 b0=b2;
1069 b1=b3;
1070 }
1071}
1072
1073#define lift5 lift
1074#if 1
1075#define W_AM 3
1076#define W_AO 0
1077#define W_AS 1
1078
1079#define W_BM 1
1080#define W_BO 8
1081#define W_BS 4
1082
1083#undef lift5
1084#define W_CM 9999
1085#define W_CO 2
1086#define W_CS 2
1087
1088#define W_DM 15
1089#define W_DO 16
1090#define W_DS 5
1091#elif 0
1092#define W_AM 55
1093#define W_AO 16
1094#define W_AS 5
1095
1096#define W_BM 3
1097#define W_BO 32
1098#define W_BS 6
1099
1100#define W_CM 127
1101#define W_CO 64
1102#define W_CS 7
1103
1104#define W_DM 7
1105#define W_DO 8
1106#define W_DS 4
1107#elif 0
1108#define W_AM 97
1109#define W_AO 32
1110#define W_AS 6
1111
1112#define W_BM 63
1113#define W_BO 512
1114#define W_BS 10
1115
1116#define W_CM 13
1117#define W_CO 8
1118#define W_CS 4
1119
1120#define W_DM 15
1121#define W_DO 16
1122#define W_DS 5
1123
1124#else
1125
1126#define W_AM 203
1127#define W_AO 64
1128#define W_AS 7
1129
1130#define W_BM 217
1131#define W_BO 2048
1132#define W_BS 12
1133
1134#define W_CM 113
1135#define W_CO 64
1136#define W_CS 7
1137
1138#define W_DM 227
1139#define W_DO 128
1140#define W_DS 9
1141#endif
aa25a462
RFI
1142static void horizontal_decompose97i(DWTELEM *b, int width){
1143 DWTELEM temp[width];
791e7b83
MN
1144 const int w2= (width+1)>>1;
1145
1146 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1147 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1148 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1149 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1150}
1151
1152
aa25a462 1153static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1154 int i;
1155
1156 for(i=0; i<width; i++){
1157 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1158 }
1159}
1160
aa25a462 1161static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1162 int i;
1163
1164 for(i=0; i<width; i++){
1165#ifdef lift5
1166 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1167#else
1168 int r= 3*(b0[i] + b2[i]);
1169 r+= r>>4;
1170 r+= r>>8;
1171 b1[i] += (r+W_CO)>>W_CS;
1172#endif
1173 }
1174}
1175
aa25a462 1176static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1177 int i;
1178
1179 for(i=0; i<width; i++){
1180 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1181 }
1182}
1183
aa25a462 1184static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1185 int i;
1186
1187 for(i=0; i<width; i++){
1188 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1189 }
1190}
1191
aa25a462 1192static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1193 int y;
791e7b83
MN
1194 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1195 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1196 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1197 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1198
1199 for(y=-4; y<height; y+=2){
1200 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1201 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1202
1203{START_TIMER
1204 if(b3 <= b5) horizontal_decompose97i(b4, width);
1205 if(y+4 < height) horizontal_decompose97i(b5, width);
1206if(width>400){
1207STOP_TIMER("horizontal_decompose97i")
1208}}
1209
1210{START_TIMER
1211 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1212 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1213 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1214 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1215
1216if(width>400){
1217STOP_TIMER("vertical_decompose97i")
1218}}
1219
1220 b0=b2;
1221 b1=b3;
1222 b2=b4;
1223 b3=b5;
1224 }
1225}
1226
aa25a462 1227void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83
MN
1228 int level;
1229
46c281e8
MN
1230 for(level=0; level<decomposition_count; level++){
1231 switch(type){
791e7b83
MN
1232 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1233 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1234 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1235 }
1236 }
1237}
1238
aa25a462
RFI
1239static void horizontal_compose53i(DWTELEM *b, int width){
1240 DWTELEM temp[width];
791e7b83
MN
1241 const int width2= width>>1;
1242 const int w2= (width+1)>>1;
1243 int A1,A2,A3,A4, x;
1244
1245#if 0
1246 A2= temp[1 ];
1247 A4= temp[0 ];
1248 A1= temp[0+width2];
1249 A1 -= (A2 + A4)>>1;
1250 A4 += (A1 + 1)>>1;
1251 b[0+width2] = A1;
1252 b[0 ] = A4;
1253 for(x=1; x+1<width2; x+=2){
1254 A3= temp[x+width2];
1255 A4= temp[x+1 ];
1256 A3 -= (A2 + A4)>>1;
1257 A2 += (A1 + A3 + 2)>>2;
1258 b[x+width2] = A3;
1259 b[x ] = A2;
1260
1261 A1= temp[x+1+width2];
1262 A2= temp[x+2 ];
1263 A1 -= (A2 + A4)>>1;
1264 A4 += (A1 + A3 + 2)>>2;
1265 b[x+1+width2] = A1;
1266 b[x+1 ] = A4;
1267 }
1268 A3= temp[width-1];
1269 A3 -= A2;
1270 A2 += (A1 + A3 + 2)>>2;
1271 b[width -1] = A3;
1272 b[width2-1] = A2;
1273#else
1274 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1275 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1276#endif
1277 for(x=0; x<width2; x++){
1278 b[2*x ]= temp[x ];
1279 b[2*x + 1]= temp[x+w2];
1280 }
1281 if(width&1)
1282 b[2*x ]= temp[x ];
1283}
1284
aa25a462 1285static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1286 int i;
1287
1288 for(i=0; i<width; i++){
1289 b1[i] += (b0[i] + b2[i])>>1;
1290 }
1291}
1292
aa25a462 1293static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1294 int i;
1295
1296 for(i=0; i<width; i++){
1297 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1298 }
1299}
1300
a0d1931c
Y
1301static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1302 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1303 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1304 cs->y = -1;
1305}
1306
f9e6ebf7
LM
1307static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1308 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1309 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1310 cs->y = -1;
1311}
1312
a0d1931c
Y
1313static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1314 int y= cs->y;
1315 int mirror0 = mirror(y-1, height-1);
1316 int mirror1 = mirror(y , height-1);
1317 int mirror2 = mirror(y+1, height-1);
1318 int mirror3 = mirror(y+2, height-1);
1319
1320 DWTELEM *b0= cs->b0;
1321 DWTELEM *b1= cs->b1;
1322 DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1323 DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1324
1325{START_TIMER
1326 if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1327 if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1328STOP_TIMER("vertical_compose53i*")}
1329
1330{START_TIMER
1331 if(y-1 >= 0) horizontal_compose53i(b0, width);
1332 if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1333STOP_TIMER("horizontal_compose53i")}
1334
1335 cs->b0 = b2;
1336 cs->b1 = b3;
1337 cs->y += 2;
1338}
1339
f9e6ebf7
LM
1340static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1341 int y= cs->y;
1342 DWTELEM *b0= cs->b0;
1343 DWTELEM *b1= cs->b1;
1344 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1345 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1346
1347{START_TIMER
1348 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1349 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1350STOP_TIMER("vertical_compose53i*")}
1351
1352{START_TIMER
1353 if(y-1 >= 0) horizontal_compose53i(b0, width);
1354 if(b0 <= b2) horizontal_compose53i(b1, width);
1355STOP_TIMER("horizontal_compose53i")}
1356
f9e6ebf7
LM
1357 cs->b0 = b2;
1358 cs->b1 = b3;
1359 cs->y += 2;
1360}
1361
1362static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1363 dwt_compose_t cs;
1364 spatial_compose53i_init(&cs, buffer, height, stride);
1365 while(cs.y <= height)
1366 spatial_compose53i_dy(&cs, buffer, width, height, stride);
791e7b83
MN
1367}
1368
1369
aa25a462
RFI
1370static void horizontal_compose97i(DWTELEM *b, int width){
1371 DWTELEM temp[width];
791e7b83
MN
1372 const int w2= (width+1)>>1;
1373
1374 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1375 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1376 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1377 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1378}
1379
aa25a462 1380static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1381 int i;
1382
1383 for(i=0; i<width; i++){
1384 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1385 }
1386}
1387
aa25a462 1388static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1389 int i;
1390
1391 for(i=0; i<width; i++){
1392#ifdef lift5
1393 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1394#else
1395 int r= 3*(b0[i] + b2[i]);
1396 r+= r>>4;
1397 r+= r>>8;
1398 b1[i] -= (r+W_CO)>>W_CS;
1399#endif
1400 }
1401}
1402
aa25a462 1403static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1404 int i;
1405
1406 for(i=0; i<width; i++){
1407 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1408 }
1409}
1410
aa25a462 1411static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1412 int i;
1413
1414 for(i=0; i<width; i++){
1415 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1416 }
1417}
1418
565a45ac
MN
1419static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1420 int i;
1421
1422 for(i=0; i<width; i++){
1423 int r;
1424 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1425#ifdef lift5
1426 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1427#else
1428 r= 3*(b2[i] + b4[i]);
1429 r+= r>>4;
1430 r+= r>>8;
1431 b3[i] -= (r+W_CO)>>W_CS;
1432#endif
1433 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1434 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1435 }
1436}
1437
a0d1931c
Y
1438static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1439 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1440 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1441 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1442 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1443 cs->y = -3;
1444}
1445
f9e6ebf7
LM
1446static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1447 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1448 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1449 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1450 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1451 cs->y = -3;
1452}
791e7b83 1453
a0d1931c
Y
1454static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1455 int y = cs->y;
1456
1457 int mirror0 = mirror(y - 1, height - 1);
1458 int mirror1 = mirror(y + 0, height - 1);
1459 int mirror2 = mirror(y + 1, height - 1);
1460 int mirror3 = mirror(y + 2, height - 1);
1461 int mirror4 = mirror(y + 3, height - 1);
1462 int mirror5 = mirror(y + 4, height - 1);
1463 DWTELEM *b0= cs->b0;
1464 DWTELEM *b1= cs->b1;
1465 DWTELEM *b2= cs->b2;
1466 DWTELEM *b3= cs->b3;
1467 DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1468 DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1469
a0d1931c 1470{START_TIMER
565a45ac
MN
1471 if(y>0 && y+4<height){
1472 vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1473 }else{
1474 if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1475 if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1476 if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1477 if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1478 }
a0d1931c
Y
1479if(width>400){
1480STOP_TIMER("vertical_compose97i")}}
a0d1931c
Y
1481
1482{START_TIMER
1483 if(y-1>= 0) horizontal_compose97i(b0, width);
1484 if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1485if(width>400 && mirror0 <= mirror2){
1486STOP_TIMER("horizontal_compose97i")}}
1487
1488 cs->b0=b2;
1489 cs->b1=b3;
1490 cs->b2=b4;
1491 cs->b3=b5;
1492 cs->y += 2;
1493}
1494
f9e6ebf7
LM
1495static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1496 int y = cs->y;
1497 DWTELEM *b0= cs->b0;
1498 DWTELEM *b1= cs->b1;
1499 DWTELEM *b2= cs->b2;
1500 DWTELEM *b3= cs->b3;
1501 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1502 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83
MN
1503
1504 if(stride == width && y+4 < height && 0){
1505 int x;
1506 for(x=0; x<width/2; x++)
1507 b5[x] += 64*2;
1508 for(; x<width; x++)
1509 b5[x] += 169*2;
1510 }
1511
1512{START_TIMER
1513 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1514 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1515 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1516 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1517if(width>400){
1518STOP_TIMER("vertical_compose97i")}}
1519
1520{START_TIMER
1521 if(y-1>= 0) horizontal_compose97i(b0, width);
1522 if(b0 <= b2) horizontal_compose97i(b1, width);
1523if(width>400 && b0 <= b2){
1524STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1525
1526 cs->b0=b2;
1527 cs->b1=b3;
1528 cs->b2=b4;
1529 cs->b3=b5;
1530 cs->y += 2;
1531}
1532
1533static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1534 dwt_compose_t cs;
1535 spatial_compose97i_init(&cs, buffer, height, stride);
1536 while(cs.y <= height)
1537 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1538}
1539
a0d1931c
Y
1540void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1541 int level;
1542 for(level=decomposition_count-1; level>=0; level--){
1543 switch(type){
1544 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1545 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1546 /* not slicified yet */
1547 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1548 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1549 }
1550 }
1551}
1552
f9e6ebf7
LM
1553void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1554 int level;
1555 for(level=decomposition_count-1; level>=0; level--){
1556 switch(type){
1557 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1558 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1559 /* not slicified yet */
1560 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1561 }
791e7b83
MN
1562 }
1563}
1564
f9e6ebf7
LM
1565void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1566 const int support = type==1 ? 3 : 5;
791e7b83 1567 int level;
f9e6ebf7 1568 if(type==2) return;
791e7b83 1569
46c281e8 1570 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1571 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1572 switch(type){
1573 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1574 break;
1575 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1576 break;
1577 case 2: break;
1578 }
791e7b83
MN
1579 }
1580 }
1581}
1582
a0d1931c
Y
1583void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1584 const int support = type==1 ? 3 : 5;
1585 int level;
1586 if(type==2) return;
1587
1588 for(level=decomposition_count-1; level>=0; level--){
1589 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1590 switch(type){
1591 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1592 break;
1593 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1594 break;
1595 case 2: break;
1596 }
1597 }
1598 }
1599}
1600
f9e6ebf7
LM
1601void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1602 if(type==2){
1603 int level;
1604 for(level=decomposition_count-1; level>=0; level--)
1605 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1606 }else{
1607 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1608 int y;
1609 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1610 for(y=0; y<height; y+=4)
1611 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1612 }
1613}
1614
0ecca7a4 1615static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1616 const int w= b->width;
1617 const int h= b->height;
1618 int x, y;
1619
791e7b83 1620 if(1){
791e7b83 1621 int run=0;
a8d73e56 1622 int runs[w*h];
791e7b83
MN
1623 int run_index=0;
1624
791e7b83
MN
1625 for(y=0; y<h; y++){
1626 for(x=0; x<w; x++){
78486403 1627 int v, p=0;
6b2f6646 1628 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1629 v= src[x + y*stride];
791e7b83
MN
1630
1631 if(y){
a8d73e56 1632 t= src[x + (y-1)*stride];
791e7b83 1633 if(x){
a8d73e56 1634 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1635 }
1636 if(x + 1 < w){
a8d73e56 1637 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1638 }
1639 }
1640 if(x){
a8d73e56 1641 l= src[x - 1 + y*stride];
6b2f6646
MN
1642 /*if(x > 1){
1643 if(orientation==1) ll= src[y + (x-2)*stride];
1644 else ll= src[x - 2 + y*stride];
791e7b83
MN
1645 }*/
1646 }
78486403 1647 if(parent){
a8d73e56
MN
1648 int px= x>>1;
1649 int py= y>>1;
78486403
MN
1650 if(px<b->parent->width && py<b->parent->height)
1651 p= parent[px + py*2*stride];
1652 }
1653 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1654 if(v){
1655 runs[run_index++]= run;
1656 run=0;
1657 }else{
1658 run++;
1659 }
1660 }
1661 }
1662 }
1663 runs[run_index++]= run;
1664 run_index=0;
1665 run= runs[run_index++];
1666
4f4e9633 1667 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1668
1669 for(y=0; y<h; y++){
d06c75a8 1670 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1671 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1672 return -1;
1673 }
791e7b83 1674 for(x=0; x<w; x++){
78486403 1675 int v, p=0;
6b2f6646 1676 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1677 v= src[x + y*stride];
791e7b83
MN
1678
1679 if(y){
a8d73e56 1680 t= src[x + (y-1)*stride];
791e7b83 1681 if(x){
a8d73e56 1682 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1683 }
1684 if(x + 1 < w){
a8d73e56 1685 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1686 }
1687 }
1688 if(x){
a8d73e56 1689 l= src[x - 1 + y*stride];
6b2f6646
MN
1690 /*if(x > 1){
1691 if(orientation==1) ll= src[y + (x-2)*stride];
1692 else ll= src[x - 2 + y*stride];
791e7b83
MN
1693 }*/
1694 }
78486403 1695 if(parent){
a8d73e56
MN
1696 int px= x>>1;
1697 int py= y>>1;
78486403
MN
1698 if(px<b->parent->width && py<b->parent->height)
1699 p= parent[px + py*2*stride];
1700 }
1701 if(/*ll|*/l|lt|t|rt|p){
1702 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1703
28869757 1704 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1705 }else{
1706 if(!run){
1707 run= runs[run_index++];
4f4e9633
MN
1708
1709 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1710 assert(v);
1711 }else{
1712 run--;
1713 assert(!v);
1714 }
1715 }
1716 if(v){
78486403 1717 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
538a3841
MN
1718 int l2= 2*ABS(l) + (l<0);
1719 int t2= 2*ABS(t) + (t<0);
6b2f6646 1720
0635cbfc 1721 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
538a3841 1722 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1723 }
1724 }
1725 }
791e7b83 1726 }
0ecca7a4 1727 return 0;
791e7b83
MN
1728}
1729
0ecca7a4 1730static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1731// encode_subband_qtree(s, b, src, parent, stride, orientation);
1732// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1733 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1734// encode_subband_dzr(s, b, src, parent, stride, orientation);
1735}
1736
a0d1931c 1737static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1738 const int w= b->width;
1739 const int h= b->height;
1740 int x,y;
ea7d9cd4 1741
791e7b83 1742 if(1){
791e7b83 1743 int run;
7b49c309
MN
1744 int index=0;
1745 int prev_index=-1;
1746 int prev2_index=0;
1747 int parent_index= 0;
1748 int prev_parent_index= 0;
791e7b83 1749
4f4e9633 1750 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83 1751 for(y=0; y<h; y++){
0cea8a03
MN
1752 int v=0;
1753 int lt=0, t=0, rt=0;
1754
a0d1931c
Y
1755 if(y && b->x_coeff[prev_index].x == 0){
1756 rt= b->x_coeff[prev_index].coeff;
0cea8a03 1757 }
791e7b83 1758 for(x=0; x<w; x++){
0cea8a03
MN
1759 int p=0;
1760 const int l= v;
1761
1762 lt= t; t= rt;
791e7b83 1763
ff765159 1764 if(y){
a0d1931c 1765 if(b->x_coeff[prev_index].x <= x)
ff765159 1766 prev_index++;
a0d1931c
Y
1767 if(b->x_coeff[prev_index].x == x + 1)
1768 rt= b->x_coeff[prev_index].coeff;
ff765159
MN
1769 else
1770 rt=0;
1771 }
78486403 1772 if(parent){
a0d1931c 1773 if(x>>1 > parent->x_coeff[parent_index].x){
7b49c309
MN
1774 parent_index++;
1775 }
a0d1931c
Y
1776 if(x>>1 == parent->x_coeff[parent_index].x){
1777 p= parent->x_coeff[parent_index].coeff;
ff765159 1778 }
78486403
MN
1779 }
1780 if(/*ll|*/l|lt|t|rt|p){
538a3841 1781 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1782
28869757 1783 v=get_rac(&s->c, &b->state[0][context]);
791e7b83
MN
1784 }else{
1785 if(!run){
4f4e9633 1786 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83
MN
1787 v=1;
1788 }else{
1789 run--;
1790 v=0;
3c1adccd 1791
7b49c309
MN
1792 if(y && parent){
1793 int max_run;
7b49c309 1794
a0d1931c
Y
1795 max_run= FFMIN(run, b->x_coeff[prev_index].x - x - 2);
1796 max_run= FFMIN(max_run, 2*parent->x_coeff[parent_index].x - x - 1);
7b49c309
MN
1797 x+= max_run;
1798 run-= max_run;
3c1adccd 1799 }
791e7b83
MN
1800 }
1801 }
1802 if(v){
538a3841
MN
1803 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1804 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1805 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1806
a0d1931c
Y
1807 b->x_coeff[index].x=x;
1808 b->x_coeff[index++].coeff= v;
7b49c309
MN
1809 }
1810 }
a0d1931c 1811 b->x_coeff[index++].x= w+1; //end marker
7b49c309
MN
1812 prev_index= prev2_index;
1813 prev2_index= index;
1814
1815 if(parent){
7b49c309 1816 if(y&1){
a0d1931c
Y
1817 while(parent->x_coeff[parent_index].x != parent->width+1)
1818 parent_index++;
1819 parent_index++;
7b49c309
MN
1820 prev_parent_index= parent_index;
1821 }else{
1822 parent_index= prev_parent_index;
791e7b83
MN
1823 }
1824 }
1825 }
a0d1931c
Y
1826
1827 b->x_coeff[index++].x= w+1; //end marker
1828 }
1829}
1830
1831static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1832 const int w= b->width;
1833 int x,y;
c97de57c
MN
1834 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1835 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1836 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1837 int new_index = 0;
1838
1839 START_TIMER
1840
1841 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1842 qadd= 0;
1843 qmul= 1<<QEXPSHIFT;
1844 }
1845
1846 /* If we are on the second or later slice, restore our index. */
1847 if (start_y != 0)
1848 new_index = save_state[0];
1849
791e7b83 1850
a0d1931c
Y
1851 for(y=start_y; y<h; y++){
1852 int x = 0;
1853 int v;
1854 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1855 memset(line, 0, b->width*sizeof(DWTELEM));
1856 v = b->x_coeff[new_index].coeff;
1857 x = b->x_coeff[new_index++].x;
1858 while(x < w)
1859 {
538a3841
MN
1860 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1861 register int u= -(v&1);
1862 line[x] = (t^u) - u;
1863
a0d1931c
Y
1864 v = b->x_coeff[new_index].coeff;
1865 x = b->x_coeff[new_index++].x;
1866 }
791e7b83 1867 }
a0d1931c
Y
1868 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1869 STOP_TIMER("decode_subband")
1870 }
1871
1872 /* Save our variables for the next slice. */
1873 save_state[0] = new_index;
1874
1875 return;
791e7b83
MN
1876}
1877
1878static void reset_contexts(SnowContext *s){
1879 int plane_index, level, orientation;
1880
19aa028d 1881 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1882 for(level=0; level<s->spatial_decomposition_count; level++){
1883 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1884 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1885 }
1886 }
1887 }
28869757
MN
1888 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1889 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1890}
1891
1892static int alloc_blocks(SnowContext *s){
1893 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1894 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1895
1896 s->b_width = w;
1897 s->b_height= h;
1898
1899 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1900 return 0;
1901}
1902
28869757
MN
1903static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1904 uint8_t *bytestream= d->bytestream;
1905 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1906 *d= *s;
28869757
MN
1907 d->bytestream= bytestream;
1908 d->bytestream_start= bytestream_start;
155ec6ed
MN
1909}
1910
1911//near copy & paste from dsputil, FIXME
1912static int pix_sum(uint8_t * pix, int line_size, int w)
1913{
1914 int s, i, j;
1915
1916 s = 0;
1917 for (i = 0; i < w; i++) {
1918 for (j = 0; j < w; j++) {
1919 s += pix[0];
1920 pix ++;
1921 }
1922 pix += line_size - w;
1923 }
1924 return s;
1925}
1926
1927//near copy & paste from dsputil, FIXME
1928static int pix_norm1(uint8_t * pix, int line_size, int w)
1929{
1930 int s, i, j;
1931 uint32_t *sq = squareTbl + 256;
1932
1933 s = 0;
1934 for (i = 0; i < w; i++) {
1935 for (j = 0; j < w; j ++) {
1936 s += sq[pix[0]];
1937 pix ++;
1938 }
1939 pix += line_size - w;
1940 }
1941 return s;
1942}
1943
1944static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1945 const int w= s->b_width << s->block_max_depth;
1946 const int rem_depth= s->block_max_depth - level;
1947 const int index= (x + y*w) << rem_depth;
1948 const int block_w= 1<<rem_depth;
1949 BlockNode block;
1950 int i,j;
1951
1952 block.color[0]= l;
1953 block.color[1]= cb;
1954 block.color[2]= cr;
1955 block.mx= mx;
1956 block.my= my;
1957 block.type= type;
1958 block.level= level;
1959
1960 for(j=0; j<block_w; j++){
1961 for(i=0; i<block_w; i++){
1962 s->block[index + i + j*w]= block;
1963 }
1964 }
1965}
1966
1967static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1968 const int offset[3]= {
1969 y*c-> stride + x,
1970 ((y*c->uvstride + x)>>1),
1971 ((y*c->uvstride + x)>>1),
1972 };
1973 int i;
1974 for(i=0; i<3; i++){
1975 c->src[0][i]= src [i];
1976 c->ref[0][i]= ref [i] + offset[i];
1977 }
1978 assert(!ref_index);
1979}
1980
1981//FIXME copy&paste
1982#define P_LEFT P[1]
1983#define P_TOP P[2]
1984#define P_TOPRIGHT P[3]
1985#define P_MEDIAN P[4]
1986#define P_MV1 P[9]
1987#define FLAG_QPEL 1 //must be 1
1988
1989static int encode_q_branch(SnowContext *s, int level, int x, int y){
1990 uint8_t p_buffer[1024];
1991 uint8_t i_buffer[1024];
1992 uint8_t p_state[sizeof(s->block_state)];
1993 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1994 RangeCoder pc, ic;
1995 uint8_t *pbbak= s->c.bytestream;
1996 uint8_t *pbbak_start= s->c.bytestream_start;
155ec6ed
MN
1997 int score, score2, iscore, i_len, p_len, block_s, sum;
1998 const int w= s->b_width << s->block_max_depth;
1999 const int h= s->b_height << s->block_max_depth;
2000 const int rem_depth= s->block_max_depth - level;
2001 const int index= (x + y*w) << rem_depth;
2002 const int block_w= 1<<(LOG2_MB_SIZE - level);
2003 static BlockNode null_block= { //FIXME add border maybe
2004 .color= {128,128,128},
2005 .mx= 0,
2006 .my= 0,
2007 .type= 0,
2008 .level= 0,
2009 };
2010 int trx= (x+1)<<rem_depth;
2011 int try= (y+1)<<rem_depth;
2012 BlockNode *left = x ? &s->block[index-1] : &null_block;
2013 BlockNode *top = y ? &s->block[index-w] : &null_block;
2014 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2015 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2016 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2017 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2018 int pl = left->color[0];
2019 int pcb= left->color[1];
2020 int pcr= left->color[2];
2021 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2022 int pmy= mid_pred(left->my, top->my, tr->my);
2023 int mx=0, my=0;
2024 int l,cr,cb, i;
2025 const int stride= s->current_picture.linesize[0];
2026 const int uvstride= s->current_picture.linesize[1];
2027 const int instride= s->input_picture.linesize[0];
2028 const int uvinstride= s->input_picture.linesize[1];
2029 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
2030 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2031 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2032 uint8_t current_mb[3][stride*block_w];
2033 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2034 int P[10][2];
2035 int16_t last_mv[3][2];
2036 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2037 const int shift= 1+qpel;
2038 MotionEstContext *c= &s->m.me;
2039 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2040 int my_context= av_log2(2*ABS(left->my - top->my));
2041 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2042
2043 assert(sizeof(s->block_state) >= 256);
2044 if(s->keyframe){
2045 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2046 return 0;
2047 }
2048
2049 //FIXME optimize
2050 for(i=0; i<block_w; i++)
2051 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
2052 for(i=0; i<block_w>>1; i++)
2053 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2054 for(i=0; i<block_w>>1; i++)
2055 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2056
2057// clip predictors / edge ?
2058
2059 P_LEFT[0]= left->mx;
2060 P_LEFT[1]= left->my;
2061 P_TOP [0]= top->mx;
2062 P_TOP [1]= top->my;
2063 P_TOPRIGHT[0]= tr->mx;
2064 P_TOPRIGHT[1]= tr->my;
2065
2066 last_mv[0][0]= s->block[index].mx;
2067 last_mv[0][1]= s->block[index].my;
2068 last_mv[1][0]= right->mx;
2069 last_mv[1][1]= right->my;
2070 last_mv[2][0]= bottom->mx;
2071 last_mv[2][1]= bottom->my;
2072
2073 s->m.mb_stride=2;
2074 s->m.mb_x=
2075 s->m.mb_y= 0;
2076 s->m.me.skip= 0;
2077
2078 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2079
2080 assert(s->m.me. stride == stride);
2081 assert(s->m.me.uvstride == uvstride);
2082
2083 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2084 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2085 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2086 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2087
ff158dc9
MN
2088 c->xmin = - x*block_w - 16+2;
2089 c->ymin = - y*block_w - 16+2;
2090 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2091 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
2092
2093 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2094 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2095 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2096 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2097 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2098 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2099 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2100
2101 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2102 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2103
2104 if (!y) {
2105 c->pred_x= P_LEFT[0];
2106 c->pred_y= P_LEFT[1];
2107 } else {
2108 c->pred_x = P_MEDIAN[0];
2109 c->pred_y = P_MEDIAN[1];
2110 }
2111
2112 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2113 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2114
2115 assert(mx >= c->xmin);
2116 assert(mx <= c->xmax);
2117 assert(my >= c->ymin);
2118 assert(my <= c->ymax);
2119
2120 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2121 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2122 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2123
2124 // subpel search
2125 pc= s->c;
28869757
MN
2126 pc.bytestream_start=
2127 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2128 memcpy(p_state, s->block_state, sizeof(s->block_state));
2129
2130 if(level!=s->block_max_depth)
28869757
MN
2131 put_rac(&pc, &p_state[4 + s_context], 1);
2132 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
155ec6ed
MN
2133 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2134 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
28869757
MN
2135 p_len= pc.bytestream - pc.bytestream_start;
2136 score += (s->lambda2*(p_len*8
2137 + (pc.outstanding_count - s->c.outstanding_count)*8
2138 + (-av_log2(pc.range) + av_log2(s->c.range))
2139 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2140
2141 block_s= block_w*block_w;
2142 sum = pix_sum(&current_mb[0][0], stride, block_w);
2143 l= (sum + block_s/2)/block_s;
2144 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2145
2146 block_s= block_w*block_w>>2;
2147 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2148 cb= (sum + block_s/2)/block_s;
2149// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2150 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2151 cr= (sum + block_s/2)/block_s;
2152// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2153
2154 ic= s->c;
28869757
MN
2155 ic.bytestream_start=
2156 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2157 memcpy(i_state, s->block_state, sizeof(s->block_state));
2158 if(level!=s->block_max_depth)
28869757
MN
2159 put_rac(&ic, &i_state[4 + s_context], 1);
2160 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
2161 put_symbol(&ic, &i_state[32], l-pl , 1);
2162 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2163 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757
MN
2164 i_len= ic.bytestream - ic.bytestream_start;
2165 iscore += (s->lambda2*(i_len*8
2166 + (ic.outstanding_count - s->c.outstanding_count)*8
2167 + (-av_log2(ic.range) + av_log2(s->c.range))
2168 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2169
2170// assert(score==256*256*256*64-1);
2171 assert(iscore < 255*255*256 + s->lambda2*10);
2172 assert(iscore >= 0);
2173 assert(l>=0 && l<=255);
2174 assert(pl>=0 && pl<=255);
2175
2176 if(level==0){
2177 int varc= iscore >> 8;
2178 int vard= score >> 8;
2179 if (vard <= 64 || vard < varc)
2180 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2181 else
2182 c->scene_change_score+= s->m.qscale;
2183 }
2184
2185 if(level!=s->block_max_depth){
28869757 2186 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
2187 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2188 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2189 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2190 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2191 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2192
2193 if(score2 < score && score2 < iscore)
2194 return score2;
2195 }
2196
2197 if(iscore < score){
28869757 2198 memcpy(pbbak, i_buffer, i_len);
155ec6ed 2199 s->c= ic;
28869757
MN
2200 s->c.bytestream_start= pbbak_start;
2201 s->c.bytestream= pbbak + i_len;
155ec6ed
MN
2202 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2203 memcpy(s->block_state, i_state, sizeof(s->block_state));
2204 return iscore;
2205 }else{
28869757 2206 memcpy(pbbak, p_buffer, p_len);
155ec6ed 2207 s->c= pc;
28869757
MN
2208 s->c.bytestream_start= pbbak_start;
2209 s->c.bytestream= pbbak + p_len;
155ec6ed
MN
2210 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2211 memcpy(s->block_state, p_state, sizeof(s->block_state));
2212 return score;
2213 }
2214}
2215
2216static void decode_q_branch(SnowContext *s, int level, int x, int y){
2217 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2218 const int rem_depth= s->block_max_depth - level;
2219 const int index= (x + y*w) << rem_depth;
2220 static BlockNode null_block= { //FIXME add border maybe
2221 .color= {128,128,128},
2222 .mx= 0,
2223 .my= 0,
2224 .type= 0,
2225 .level= 0,
2226 };
2227 int trx= (x+1)<<rem_depth;
155ec6ed
MN
2228 BlockNode *left = x ? &s->block[index-1] : &null_block;
2229 BlockNode *top = y ? &s->block[index-w] : &null_block;
2230 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2231 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2232 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2233
2234 if(s->keyframe){
2235 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2236 return;
2237 }
2238
28869757 2239 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
155ec6ed
MN
2240 int type;
2241 int l = left->color[0];
2242 int cb= left->color[1];
2243 int cr= left->color[2];
2244 int mx= mid_pred(left->mx, top->mx, tr->mx);
2245 int my= mid_pred(left->my, top->my, tr->my);
2246 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2247 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2248
28869757 2249 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2250
2251 if(type){
2252 l += get_symbol(&s->c, &s->block_state[32], 1);
2253 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2254 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2255 }else{
2256 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2257 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2258 }
2259 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2260 }else{
2261 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2262 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2263 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2264 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2265 }
2266}
2267
2268static void encode_blocks(SnowContext *s){
2269 int x, y;
2270 int w= s->b_width;
2271 int h= s->b_height;
2272
2273 for(y=0; y<h; y++){
d06c75a8 2274 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2275 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2276 return;
2277 }
155ec6ed
MN
2278 for(x=0; x<w; x++){
2279 encode_q_branch(s, 0, x, y);
2280 }
2281 }
2282}
2283
2284static void decode_blocks(SnowContext *s){
2285 int x, y;
2286 int w= s->b_width;
2287 int h= s->b_height;
2288
2289 for(y=0; y<h; y++){
2290 for(x=0; x<w; x++){
2291 decode_q_branch(s, 0, x, y);
2292 }
2293 }
791e7b83
MN
2294}
2295
2296static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2297 int x, y;
3924dac4 2298START_TIMER
791e7b83
MN
2299 for(y=0; y < b_h+5; y++){
2300 for(x=0; x < b_w; x++){
3924dac4
MN
2301 int a0= src[x ];
2302 int a1= src[x + 1];
2303 int a2= src[x + 2];
2304 int a3= src[x + 3];
2305 int a4= src[x + 4];
2306 int a5= src[x + 5];
791e7b83
MN
2307// int am= 9*(a1+a2) - (a0+a3);
2308 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2309// int am= 18*(a2+a3) - 2*(a1+a4);
2310// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2311// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2312
2313// if(b_w==16) am= 8*(a1+a2);
2314
3924dac4
MN
2315 if(dx<8) tmp[x]= (32*a2*( 8-dx) + am* dx + 128)>>8;
2316 else tmp[x]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
791e7b83
MN
2317
2318/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2319 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2320 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2321 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2322 }
3924dac4
MN
2323 tmp += stride;
2324 src += stride;
791e7b83 2325 }
3924dac4
MN
2326 tmp -= (b_h+5)*stride;
2327
791e7b83
MN
2328 for(y=0; y < b_h; y++){
2329 for(x=0; x < b_w; x++){
3924dac4
MN
2330 int a0= tmp[x + 0*stride];
2331 int a1= tmp[x + 1*stride];
2332 int a2= tmp[x + 2*stride];
2333 int a3= tmp[x + 3*stride];
2334 int a4= tmp[x + 4*stride];
2335 int a5= tmp[x + 5*stride];
791e7b83
MN
2336 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2337// int am= 18*(a2+a3) - 2*(a1+a4);
2338/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2339 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2340
2341// if(b_w==16) am= 8*(a1+a2);
2342
3924dac4
MN
2343 if(dy<8) dst[x]= (32*a2*( 8-dy) + am* dy + 128)>>8;
2344 else dst[x]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83
MN
2345
2346/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2347 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2348 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2349 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2350 }
3924dac4
MN
2351 dst += stride;
2352 tmp += stride;
791e7b83 2353 }
3924dac4 2354STOP_TIMER("mc_block")
791e7b83
MN
2355}
2356
791e7b83 2357#define mca(dx,dy,b_w)\
d92b5807 2358static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
791e7b83
MN
2359 uint8_t tmp[stride*(b_w+5)];\
2360 assert(h==b_w);\
2361 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2362}
2363
2364mca( 0, 0,16)
2365mca( 8, 0,16)
2366mca( 0, 8,16)
2367mca( 8, 8,16)
d92b5807
MN
2368mca( 0, 0,8)
2369mca( 8, 0,8)
2370mca( 0, 8,8)
2371mca( 8, 8,8)
791e7b83 2372
ff158dc9
MN
2373static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2374 if(block->type){
2375 int x, y;
2376 const int color= block->color[plane_index];
2377 for(y=0; y < b_h; y++){
2378 for(x=0; x < b_w; x++){
2379 dst[x + y*stride]= color;
2380 }
2381 }
2382 }else{
2383 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2384 int mx= block->mx*scale;
2385 int my= block->my*scale;
ec697587
MN
2386 const int dx= mx&15;
2387 const int dy= my&15;
ff158dc9
MN
2388 sx += (mx>>4) - 2;
2389 sy += (my>>4) - 2;
2390 src += sx + sy*stride;
2391 if( (unsigned)sx >= w - b_w - 4
2392 || (unsigned)sy >= h - b_h - 4){
2393 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2394 src= tmp + MB_SIZE;
2395 }
ec697587
MN
2396 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2397 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2398 else
2399 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
ff158dc9
MN
2400 }
2401}
2402
2403static always_inline int same_block(BlockNode *a, BlockNode *b){
2404 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2405}
2406
2407//FIXME name clenup (b_w, block_w, b_width stuff)
a0d1931c
Y
2408static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2409 DWTELEM * dst = NULL;
2410 const int b_width = s->b_width << s->block_max_depth;
2411 const int b_height= s->b_height << s->block_max_depth;
2412 const int b_stride= b_width;
2413 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2414 BlockNode *rt= lt+1;
2415 BlockNode *lb= lt+b_stride;
2416 BlockNode *rb= lb+1;
2417 uint8_t *block[4];
2418 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2419 int x,y;
2420
2421 if(b_x<0){
2422 lt= rt;
2423 lb= rb;
2424 }else if(b_x + 1 >= b_width){
2425 rt= lt;
2426 rb= lb;
2427 }
2428 if(b_y<0){
2429 lt= lb;
2430 rt= rb;
2431 }else if(b_y + 1 >= b_height){
2432 lb= lt;
2433 rb= rt;
2434 }
2435
2436 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2437 obmc -= src_x;
2438 b_w += src_x;
2439 src_x=0;
2440 }else if(src_x + b_w > w){
2441 b_w = w - src_x;
2442 }
2443 if(src_y<0){
2444 obmc -= src_y*obmc_stride;
2445 b_h += src_y;
2446 src_y=0;
2447 }else if(src_y + b_h> h){
2448 b_h = h - src_y;
2449 }
2450
2451 if(b_w<=0 || b_h<=0) return;
2452
2453assert(src_stride > 7*MB_SIZE);
2454// old_dst += src_x + src_y*dst_stride;
2455 dst8+= src_x + src_y*src_stride;
2456// src += src_x + src_y*src_stride;
2457
2458 block[0]= tmp+3*MB_SIZE;
2459 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2460
2461 if(same_block(lt, rt)){
2462 block[1]= block[0];
2463 }else{
2464 block[1]= tmp + 4*MB_SIZE;
2465 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2466 }
2467
2468 if(same_block(lt, lb)){
2469 block[2]= block[0];
2470 }else if(same_block(rt, lb)){
2471 block[2]= block[1];
2472 }else{
2473 block[2]= tmp+5*MB_SIZE;
2474 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2475 }
2476
2477 if(same_block(lt, rb) ){
2478 block[3]= block[0];
2479 }else if(same_block(rt, rb)){
2480 block[3]= block[1];
2481 }else if(same_block(lb, rb)){
2482 block[3]= block[2];
2483 }else{
2484 block[3]= tmp+6*MB_SIZE;
2485 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2486 }
2487#if 0
2488 for(y=0; y<b_h; y++){
2489 for(x=0; x<b_w; x++){
2490 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2491 if(add) dst[x + y*dst_stride] += v;
2492 else dst[x + y*dst_stride] -= v;
2493 }
2494 }
2495 for(y=0; y<b_h; y++){
2496 uint8_t *obmc2= obmc + (obmc_stride>>1);
2497 for(x=0; x<b_w; x++){
2498 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2499 if(add) dst[x + y*dst_stride] += v;
2500 else dst[x + y*dst_stride] -= v;
2501 }
2502 }
2503 for(y=0; y<b_h; y++){
2504 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2505 for(x=0; x<b_w; x++){
2506 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2507 if(add) dst[x + y*dst_stride] += v;
2508 else dst[x + y*dst_stride] -= v;
2509 }
2510 }
2511 for(y=0; y<b_h; y++){
2512 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2513 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2514 for(x=0; x<b_w; x++){
2515 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2516 if(add) dst[x + y*dst_stride] += v;
2517 else dst[x + y*dst_stride] -= v;
2518 }
2519 }
2520#else
2521{
2522
2523 START_TIMER
2524
2525 int block_index = 0;
2526 for(y=0; y<b_h; y++){
2527 //FIXME ugly missue of obmc_stride
2528 uint8_t *obmc1= obmc + y*obmc_stride;
2529 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2530 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2531 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2532 dst = slice_buffer_get_line(sb, src_y + y);
2533 for(x=0; x<b_w; x++){
2534 int v= obmc1[x] * block[3][x + y*src_stride]
2535 +obmc2[x] * block[2][x + y*src_stride]
2536 +obmc3[x] * block[1][x + y*src_stride]
2537 +obmc4[x] * block[0][x + y*src_stride];
2538
2539 v <<= 8 - LOG2_OBMC_MAX;
2540 if(FRAC_BITS != 8){
2541 v += 1<<(7 - FRAC_BITS);
2542 v >>= 8 - FRAC_BITS;
2543 }
2544 if(add){
2545// v += old_dst[x + y*dst_stride];
2546 v += dst[x + src_x];
2547 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2548 if(v&(~255)) v= ~(v>>31);
2549 dst8[x + y*src_stride] = v;
2550 }else{
2551// old_dst[x + y*dst_stride] -= v;
2552 dst[x + src_x] -= v;
2553 }
2554 }
2555 }
2556 STOP_TIMER("Inner add y block")
2557}
2558#endif
2559}
2560
2561//FIXME name clenup (b_w, block_w, b_width stuff)
715a97f0 2562static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
ff158dc9
MN
2563 const int b_width = s->b_width << s->block_max_depth;
2564 const int b_height= s->b_height << s->block_max_depth;
2565 const int b_stride= b_width;
2566 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2567 BlockNode *rt= lt+1;
2568 BlockNode *lb= lt+b_stride;
2569 BlockNode *rb= lb+1;
2570 uint8_t *block[4];
2571 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
791e7b83
MN
2572 int x,y;
2573
ff158dc9
MN
2574 if(b_x<0){
2575 lt= rt;
2576 lb= rb;
2577 }else if(b_x + 1 >= b_width){
2578 rt= lt;
2579 rb= lb;
791e7b83 2580 }
ff158dc9
MN
2581 if(b_y<0){
2582 lt= lb;
2583 rt= rb;
2584 }else if(b_y + 1 >= b_height){
2585 lb= lt;
2586 rb= rt;
2587 }
2588
2589 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2590 obmc -= src_x;
2591 b_w += src_x;
2592 src_x=0;
2593 }else if(src_x + b_w > w){
2594 b_w = w - src_x;
2595 }
2596 if(src_y<0){
2597 obmc -= src_y*obmc_stride;
2598 b_h += src_y;
2599 src_y=0;
2600 }else if(src_y + b_h> h){
2601 b_h = h - src_y;
791e7b83 2602 }
620ab797 2603
ff158dc9 2604 if(b_w<=0 || b_h<=0) return;
155ec6ed 2605
ff158dc9
MN
2606assert(src_stride > 7*MB_SIZE);
2607 dst += src_x + src_y*dst_stride;
715a97f0 2608 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2609// src += src_x + src_y*src_stride;
2610
2611 block[0]= tmp+3*MB_SIZE;
2612 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2613
2614 if(same_block(lt, rt)){
2615 block[1]= block[0];
791e7b83 2616 }else{
ff158dc9
MN
2617 block[1]= tmp + 4*MB_SIZE;
2618 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2619 }
2620
2621 if(same_block(lt, lb)){
2622 block[2]= block[0];
2623 }else if(same_block(rt, lb)){
2624 block[2]= block[1];
2625 }else{
2626 block[2]= tmp+5*MB_SIZE;
2627 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2628 }
791e7b83 2629
ff158dc9
MN
2630 if(same_block(lt, rb) ){
2631 block[3]= block[0];
2632 }else if(same_block(rt, rb)){
2633 block[3]= block[1];
2634 }else if(same_block(lb, rb)){
2635 block[3]= block[2];
2636 }else{
2637 block[3]= tmp+6*MB_SIZE;
2638 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2639 }
2640#if 0
2641 for(y=0; y<b_h; y++){
2642 for(x=0; x<b_w; x++){
2643 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2644 if(add) dst[x + y*dst_stride] += v;
2645 else dst[x + y*dst_stride] -= v;
2646 }
2647 }
2648 for(y=0; y<b_h; y++){
2649 uint8_t *obmc2= obmc + (obmc_stride>>1);
2650 for(x=0; x<b_w; x++){
2651 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2652 if(add) dst[x + y*dst_stride] += v;
2653 else dst[x + y*dst_stride] -= v;
2654 }
2655 }
2656 for(y=0; y<b_h; y++){
2657 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2658 for(x=0; x<b_w; x++){
2659 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2660 if(add) dst[x + y*dst_stride] += v;
2661 else dst[x + y*dst_stride] -= v;
2662 }
2663 }
2664 for(y=0; y<b_h; y++){
2665 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2666 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2667 for(x=0; x<b_w; x++){
2668 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2669 if(add) dst[x + y*dst_stride] += v;
2670 else dst[x + y*dst_stride] -= v;
2671 }
2672 }
2673#else
2674 for(y=0; y<b_h; y++){
2675 //FIXME ugly missue of obmc_stride
2676 uint8_t *obmc1= obmc + y*obmc_stride;
2677 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2678 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2679 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2680 for(x=0; x<b_w; x++){
2681 int v= obmc1[x] * block[3][x + y*src_stride]
2682 +obmc2[x] * block[2][x + y*src_stride]
2683 +obmc3[x] * block[1][x + y*src_stride]
2684 +obmc4[x] * block[0][x + y*src_stride];
715a97f0
MN
2685
2686 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2687 if(FRAC_BITS != 8){
2688 v += 1<<(7 - FRAC_BITS);
2689 v >>= 8 - FRAC_BITS;
2690 }
715a97f0
MN
2691 if(add){
2692 v += dst[x + y*dst_stride];
2693 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2694 if(v&(~255)) v= ~(v>>31);
2695 dst8[x + y*src_stride] = v;
2696 }else{
2697 dst[x + y*dst_stride] -= v;
2698 }
791e7b83
MN
2699 }
2700 }
ff158dc9 2701#endif
791e7b83
MN
2702}
2703
a0d1931c
Y
2704static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2705 Plane *p= &s->plane[plane_index];
2706 const int mb_w= s->b_width << s->block_max_depth;
2707 const int mb_h= s->b_height << s->block_max_depth;
2708 int x, y, mb_x;
2709 int block_size = MB_SIZE >> s->block_max_depth;
2710 int block_w = plane_index ? block_size/2 : block_size;
2711 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2712 int obmc_stride= plane_index ? block_size : 2*block_size;
2713 int ref_stride= s->current_picture.linesize[plane_index];
2714 uint8_t *ref = s->last_picture.data[plane_index];
2715 uint8_t *dst8= s->current_picture.data[plane_index];
2716 int w= p->width;
2717 int h= p->height;
2718 START_TIMER
2719
2720 if(s->keyframe || (s->avctx->debug&512)){
2721 if(mb_y==mb_h)
2722 return;
2723
2724 if(add){
2725 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++)
2726 {
2727// DWTELEM * line = slice_buffer_get_line(sb, y);
2728 DWTELEM * line = sb->line[y];
2729 for(x=0; x<w; x++)
2730 {
2731// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2732 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2733 v >>= FRAC_BITS;
2734 if(v&(~255)) v= ~(v>>31);
2735 dst8[x + y*ref_stride]= v;
2736 }
2737 }
2738 }else{
2739 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++)
2740 {
2741// DWTELEM * line = slice_buffer_get_line(sb, y);
2742 DWTELEM * line = sb->line[y];
2743 for(x=0; x<w; x++)
2744 {
2745 line[x] -= 128 << FRAC_BITS;
2746// buf[x + y*w]-= 128<<FRAC_BITS;
2747 }
2748 }
2749 }
2750
2751 return;
2752 }
2753
2754 for(mb_x=0; mb_x<=mb_w; mb_x++){
2755 START_TIMER
2756
2757 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2758 block_w*mb_x - block_w/2,
2759 block_w*mb_y - block_w/2,
2760 block_w, block_w,
2761 w, h,
2762 w, ref_stride, obmc_stride,
2763 mb_x - 1, mb_y - 1,
2764 add, plane_index);
2765
2766 STOP_TIMER("add_yblock")
2767 }
2768
2769 STOP_TIMER("predict_slice")
2770}
2771
f9e6ebf7 2772static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2773 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2774 const int mb_w= s->b_width << s->block_max_depth;
2775 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2776 int x, y, mb_x;
155ec6ed
MN
2777 int block_size = MB_SIZE >> s->block_max_depth;
2778 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2779 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
155ec6ed 2780 int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2781 int ref_stride= s->current_picture.linesize[plane_index];
791e7b83 2782 uint8_t *ref = s->last_picture.data[plane_index];
715a97f0 2783 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2784 int w= p->width;
2785 int h= p->height;
fff6d4ea 2786 START_TIMER
791e7b83 2787
ff158dc9 2788 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2789 if(mb_y==mb_h)
2790 return;
2791
715a97f0 2792 if(add){
f9e6ebf7 2793 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
715a97f0
MN
2794 for(x=0; x<w; x++){
2795 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2796 v >>= FRAC_BITS;
2797 if(v&(~255)) v= ~(v>>31);
2798 dst8[x + y*ref_stride]= v;
2799 }
2800 }
2801 }else{
f9e6ebf7 2802 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
715a97f0
MN
2803 for(x=0; x<w; x++){
2804 buf[x + y*w]-= 128<<FRAC_BITS;
2805 }
ff158dc9 2806 }
791e7b83 2807 }
ff158dc9
MN
2808
2809 return;
791e7b83
MN
2810 }
2811
ff158dc9 2812 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2813 START_TIMER
ff158dc9 2814
715a97f0 2815 add_yblock(s, buf, dst8, ref, obmc,
ff158dc9 2816 block_w*mb_x - block_w/2,
791e7b83 2817 block_w*mb_y - block_w/2,
ff158dc9 2818 block_w, block_w,
791e7b83 2819 w, h,
ff158dc9
MN
2820 w, ref_stride, obmc_stride,
2821 mb_x - 1, mb_y - 1,
2822 add, plane_index);
2823
2824 STOP_TIMER("add_yblock")
791e7b83 2825 }
fff6d4ea 2826
f9e6ebf7
LM
2827 STOP_TIMER("predict_slice")
2828}
2829
2830static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2831 const int mb_h= s->b_height << s->block_max_depth;
2832 int mb_y;
2833 for(mb_y=0; mb_y<=mb_h; mb_y++)
2834 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2835}
2836
2837static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2838 const int level= b->level;
2839 const int w= b->width;
2840 const int h= b->height;
c97de57c
MN
2841 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2842 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
da66b631
MN
2843 int x,y, thres1, thres2;
2844 START_TIMER
791e7b83 2845
93fbdb5a
MN
2846 if(s->qlog == LOSSLESS_QLOG) return;
2847
791e7b83 2848 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
2849 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2850 thres2= 2*thres1;
791e7b83
MN
2851
2852 if(!bias){
2853 for(y=0; y<h; y++){
2854 for(x=0; x<w; x++){
da66b631
MN
2855 int i= src[x + y*stride];
2856
2857 if((unsigned)(i+thres1) > thres2){
2858 if(i>=0){
2859 i<<= QEXPSHIFT;
2860 i/= qmul; //FIXME optimize
2861 src[x + y*stride]= i;
2862 }else{
2863 i= -i;
2864 i<<= QEXPSHIFT;
2865 i/= qmul; //FIXME optimize
2866 src[x + y*stride]= -i;
2867 }
2868 }else
2869 src[x + y*stride]= 0;
791e7b83
MN
2870 }
2871 }
2872 }else{
2873 for(y=0; y<h; y++){
2874 for(x=0; x<w; x++){
2875 int i= src[x + y*stride];
2876
da66b631
MN
2877 if((unsigned)(i+thres1) > thres2){
2878 if(i>=0){
2879 i<<= QEXPSHIFT;
2880 i= (i + bias) / qmul; //FIXME optimize
2881 src[x + y*stride]= i;
2882 }else{
2883 i= -i;
2884 i<<= QEXPSHIFT;
2885 i= (i + bias) / qmul; //FIXME optimize
2886 src[x + y*stride]= -i;
2887 }
2888 }else
2889 src[x + y*stride]= 0;
791e7b83
MN
2890 }
2891 }
2892 }
da66b631
MN
2893 if(level+1 == s->spatial_decomposition_count){
2894// STOP_TIMER("quantize")
2895 }
791e7b83
MN
2896}
2897
a0d1931c
Y
2898static void dequantize_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride){
2899 const int w= b->width;
2900 const int h= b->height;
c97de57c
MN
2901 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2902 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
2903 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2904 int x,y;
2905 START_TIMER
2906
2907 if(s->qlog == LOSSLESS_QLOG) return;
2908
a0d1931c
Y
2909 for(y=0; y<h; y++){
2910// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2911 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2912 for(x=0; x<w; x++){
2913 int i= line[x];
2914 if(i<0){
2915 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2916 }else if(i>0){
2917 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
2918 }
2919 }
2920 }
2921 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2922 STOP_TIMER("dquant")
2923 }
2924}
2925
791e7b83 2926static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
791e7b83
MN
2927 const int w= b->width;
2928 const int h= b->height;
c97de57c
MN
2929 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2930 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
791e7b83
MN
2931 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2932 int x,y;
ea7d9cd4 2933 START_TIMER
791e7b83 2934
93fbdb5a
MN
2935 if(s->qlog == LOSSLESS_QLOG) return;
2936
791e7b83
MN
2937 for(y=0; y<h; y++){
2938 for(x=0; x<w; x++){
2939 int i= src[x + y*stride];
2940 if(i<0){
2941 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2942 }else if(i>0){
2943 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
2944 }
2945 }
2946 }
ea7d9cd4
MN
2947 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2948 STOP_TIMER("dquant")
2949 }
791e7b83
MN
2950}
2951
2952static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2953 const int w= b->width;
2954 const int h= b->height;
2955 int x,y;
2956
2957 for(y=h-1; y>=0; y--){
2958 for(x=w-1; x>=0; x--){
2959 int i= x + y*stride;
2960
2961 if(x){
2962 if(use_median){
2963 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2964 else src[i] -= src[i - 1];
2965 }else{
2966 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2967 else src[i] -= src[i - 1];
2968 }
2969 }else{
2970 if(y) src[i] -= src[i - stride];
2971 }
2972 }
2973 }
2974}
2975
a0d1931c
Y
2976static void correlate_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2977 const int w= b->width;
2978 const int h= b->height;
2979 int x,y;
2980
2981// START_TIMER
2982
2983 DWTELEM * line;
2984 DWTELEM * prev;
2985
2986 for(y=0; y<h; y++){
2987 prev = line;
2988// line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2989 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2990 for(x=0; x<w; x++){
2991 if(x){
2992 if(use_median){
2993 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
2994 else line[x] += line[x - 1];
2995 }else{
2996 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
2997 else line[x] += line[x - 1];
2998 }
2999 }else{
3000 if(y) line[x] += prev[x];
3001 }
3002 }
3003 }
3004
3005// STOP_TIMER("correlate")
3006}
3007
791e7b83
MN
3008static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3009 const int w= b->width;
3010 const int h= b->height;
3011 int x,y;
3012
3013 for(y=0; y<h; y++){
3014 for(x=0; x<w; x++){
3015 int i= x + y*stride;
3016
3017 if(x){
3018 if(use_median){
3019 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3020 else src[i] += src[i - 1];
3021 }else{
3022 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3023 else src[i] += src[i - 1];
3024 }
3025 }else{
3026 if(y) src[i] += src[i - stride];
3027 }
3028 }
3029 }
3030}
3031
3032static void encode_header(SnowContext *s){
3033 int plane_index, level, orientation;
28869757
MN
3034 uint8_t kstate[32];
3035
3036 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 3037
28869757 3038 put_rac(&s->c, kstate, s->keyframe);
19aa028d
MN
3039 if(s->keyframe || s->always_reset)
3040 reset_contexts(s);
791e7b83
MN
3041 if(s->keyframe){
3042 put_symbol(&s->c, s->header_state, s->version, 0);
28869757 3043 put_rac(&s->c, s->header_state, s->always_reset);
791e7b83
MN
3044 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3045 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3046 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3047 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
791e7b83
MN
3048 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3049 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
28869757
MN
3050 put_rac(&s->c, s->header_state, s->spatial_scalability);
3051// put_rac(&s->c, s->header_state, s->rate_scalability);
791e7b83
MN
3052
3053 for(plane_index=0; plane_index<2; plane_index++){
3054 for(level=0; level<s->spatial_decomposition_count; level++){
3055 for(orientation=level ? 1:0; orientation<4; orientation++){
3056 if(orientation==2) continue;
3057 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3058 }
3059 }
3060 }
3061 }
3062 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3063 put_symbol(&s->c, s->header_state, s->qlog, 1);
3064 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3065 put_symbol(&s->c, s->header_state, s->qbias, 1);
155ec6ed 3066 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
791e7b83
MN
3067}
3068
3069static int decode_header(SnowContext *s){
3070 int plane_index, level, orientation;
28869757
MN
3071 uint8_t kstate[32];
3072
3073 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 3074
28869757 3075 s->keyframe= get_rac(&s->c, kstate);
19aa028d
MN
3076 if(s->keyframe || s->always_reset)
3077 reset_contexts(s);
791e7b83
MN
3078 if(s->keyframe){
3079 s->version= get_symbol(&s->c, s->header_state, 0);
3080 if(s->version>0){
3081 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3082 return -1;
3083 }
28869757 3084 s->always_reset= get_rac(&s->c, s->header_state);
791e7b83
MN
3085 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3086 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3087 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3088 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
3089 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3090 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
28869757
MN
3091 s->spatial_scalability= get_rac(&s->c, s->header_state);
3092// s->rate_scalability= get_rac(&s->c, s->header_state);
791e7b83
MN
3093
3094 for(plane_index=0; plane_index<3; plane_index++){
3095 for(level=0; level<s->spatial_decomposition_count; level++){
3096 for(orientation=level ? 1:0; orientation<4; orientation++){
3097 int q;
3098 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3099 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3100 else q= get_symbol(&s->c, s->header_state, 1);
3101 s->plane[plane_index].band[level][orientation].qlog= q;
3102 }
3103 }
3104 }
3105 }
3106
3107 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3108 if(s->spatial_decomposition_type > 2){
3109 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3110 return -1;
3111 }
3112
3113 s->qlog= get_symbol(&s->c, s->header_state, 1);
3114 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3115 s->qbias= get_symbol(&s->c, s->header_state, 1);
155ec6ed 3116 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
3117
3118 return 0;
3119}
3120
c97de57c
MN
3121static void init_qexp(){
3122 int i;
3123 double v=128;
3124
3125 for(i=0; i<QROOT; i++){
3126 qexp[i]= lrintf(v);
3127 v *= pow(2, 1.0 / QROOT);
3128 }
3129}
3130
791e7b83
MN
3131static int common_init(AVCodecContext *avctx){
3132 SnowContext *s = avctx->priv_data;
3133 int width, height;
3134 int level, orientation, plane_index, dec;
3135
3136 s->avctx= avctx;
3137
3138 dsputil_init(&s->dsp, avctx);
3139
3140#define mcf(dx,dy)\
3141 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3142 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
d92b5807
MN
3143 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3144 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3145 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3146 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
791e7b83
MN
3147
3148 mcf( 0, 0)
3149 mcf( 4, 0)
3150 mcf( 8, 0)
3151 mcf(12, 0)
3152 mcf( 0, 4)
3153 mcf( 4, 4)
3154 mcf( 8, 4)
3155 mcf(12, 4)
3156 mcf( 0, 8)
3157 mcf( 4, 8)
3158 mcf( 8, 8)
3159 mcf(12, 8)
3160 mcf( 0,12)
3161 mcf( 4,12)
3162 mcf( 8,12)
3163 mcf(12,12)
3164
3165#define mcfh(dx,dy)\
3166 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3167 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
d92b5807
MN
3168 mc_block_hpel ## dx ## dy ## 16;\
3169 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3170 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3171 mc_block_hpel ## dx ## dy ## 8;
791e7b83
MN
3172
3173 mcfh(0, 0)
3174 mcfh(8, 0)
3175 mcfh(0, 8)
3176 mcfh(8, 8)
c97de57c
MN
3177
3178 if(!qexp[0])
3179 init_qexp();
3180
791e7b83
MN
3181 dec= s->spatial_decomposition_count= 5;
3182 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3183
3184 s->chroma_h_shift= 1; //FIXME XXX
3185 s->chroma_v_shift= 1;
3186
3187// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3188
155ec6ed
MN
3189 width= s->avctx->width;
3190 height= s->avctx->height;
3191
3192 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
791e7b83
MN
3193
3194 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
155ec6ed 3195 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
791e7b83
MN
3196
3197 for(plane_index=0; plane_index<3; plane_index++){
3198 int w= s->avctx->width;
3199 int h= s->avctx->height;
3200
3201 if(plane_index){
3202 w>>= s->chroma_h_shift;
3203 h>>= s->chroma_v_shift;
3204 }
3205 s->plane[plane_index].width = w;
3206 s->plane[plane_index].height= h;
3bb9f096 3207//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
791e7b83
MN
3208 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3209 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3210 SubBand *b= &s->plane[plane_index].band[level][orientation];
3211
3212 b->buf= s->spatial_dwt_buffer;
3213 b->level= level;
3214 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3215 b->width = (w + !(orientation&1))>>1;
3216 b->height= (h + !(orientation>1))>>1;
3217
a0d1931c
Y
3218 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3219 b->buf_x_offset = 0;
3220 b->buf_y_offset = 0;
3221
3222 if(orientation&1){
3223 b->buf += (w+1)>>1;
3224 b->buf_x_offset = (w+1)>>1;
3225 }
3226 if(orientation>1){
3227 b->buf += b->stride>>1;
3228 b->buf_y_offset = b->stride_line >> 1;
3229 }
791e7b83
MN
3230
3231 if(level)
3232 b->parent= &s->plane[plane_index].band[level-1][orientation];
a0d1931c 3233 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
791e7b83
MN
3234 }
3235 w= (w+1)>>1;
3236 h= (h+1)>>1;
3237 }
3238 }
3239
791e7b83
MN
3240 reset_contexts(s);
3241/*
3242 width= s->width= avctx->width;
3243 height= s->height= avctx->height;
3244
3245 assert(width && height);
3246*/
3247 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3248
3249 return 0;
3250}
3251
3252
3253static void calculate_vissual_weight(SnowContext *s, Plane *p){
3254 int width = p->width;
3255 int height= p->height;
39c61bbb 3256 int level, orientation, x, y;
791e7b83
MN
3257
3258 for(level=0; level<s->spatial_decomposition_count; level++){
3259 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3260 SubBand *b= &p->band[level][orientation];
3261 DWTELEM *buf= b->buf;
3262 int64_t error=0;
3263
3264 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3265 buf[b->width/2 + b->height/2*b->stride]= 256*256;
46c281e8 3266 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3267 for(y=0; y<height; y++){
3268 for(x=0; x<width; x++){
3269 int64_t d= s->spatial_dwt_buffer[x + y*width];
3270 error += d*d;
3271 }
3272 }
3273
3274 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
46c281e8 3275// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
791e7b83
MN
3276 }
3277 }
3278}
3279
3280static int encode_init(AVCodecContext *avctx)
3281{
3282 SnowContext *s = avctx->priv_data;
39c61bbb 3283 int plane_index;
791e7b83 3284
2ff9ff5b
MN
3285 if(avctx->strict_std_compliance >= 0){
3286 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
53513831 3287 "use vstrict=-1 / -strict -1 to use it anyway\n");
2ff9ff5b
MN
3288 return -1;
3289 }
3290
791e7b83 3291 common_init(avctx);
155ec6ed 3292 alloc_blocks(s);
791e7b83
MN
3293
3294 s->version=0;
3295
3296 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3297 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3298 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
791e7b83
MN
3299 h263_encode_init(&s->m); //mv_penalty
3300
3301 for(plane_index=0; plane_index<3; plane_index++){
3302 calculate_vissual_weight(s, &s->plane[plane_index]);
3303 }
3304
3305
3306 avctx->coded_frame= &s->current_picture;
3307 switch(avctx->pix_fmt){
3308// case PIX_FMT_YUV444P:
3309// case PIX_FMT_YUV422P:
3310 case PIX_FMT_YUV420P:
3311 case PIX_FMT_GRAY8:
3312// case PIX_FMT_YUV411P:
3313// case PIX_FMT_YUV410P:
3314 s->colorspace_type= 0;
3315 break;
3316/* case PIX_FMT_RGBA32:
3317 s->colorspace= 1;
3318 break;*/
3319 default:
3320 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3321 return -1;
3322 }
3323// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3324 s->chroma_h_shift= 1;
3325 s->chroma_v_shift= 1;
3326 return 0;
3327}
3328
3329static int frame_start(SnowContext *s){
3330 AVFrame tmp;
64886072
MN
3331 int w= s->avctx->width; //FIXME round up to x16 ?
3332 int h= s->avctx->height;
791e7b83 3333
64886072
MN
3334 if(s->current_picture.data[0]){
3335 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
3336 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3337 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3338 }
3339
791e7b83
MN
3340 tmp= s->last_picture;
3341 s->last_picture= s->current_picture;
3342 s->current_picture= tmp;
3343
3344 s->current_picture.reference= 1;
3345 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3346 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3347 return -1;
3348 }
3349
3350 return 0;
3351}
3352
3353static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3354 SnowContext *s = avctx->priv_data;
28869757 3355 RangeCoder * const c= &s->c;
791e7b83
MN
3356 AVFrame *pict = data;
3357 const int width= s->avctx->width;
3358 const int height= s->avctx->height;
39c61bbb 3359 int level, orientation, plane_index;
791e7b83 3360
28869757
MN
3361 ff_init_range_encoder(c, buf, buf_size);
3362 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
791e7b83
MN
3363
3364 s->input_picture = *pict;
3365
791e7b83
MN
3366 s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3367 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3368
93fbdb5a
MN
3369 if(pict->quality){
3370 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3371 //<64 >60
c97de57c 3372 s->qlog += 61*QROOT/8;
93fbdb5a
MN
3373 }else{
3374 s->qlog= LOSSLESS_QLOG;
3375 }
791e7b83 3376
791e7b83 3377 frame_start(s);
19aa028d 3378 s->current_picture.key_frame= s->keyframe;
791e7b83
MN
3379
3380 if(pict->pict_type == P_TYPE){
3381 int block_width = (width +15)>>4;
3382 int block_height= (height+15)>>4;
3383 int stride= s->current_picture.linesize[0];
791e7b83
MN
3384
3385 assert(s->current_picture.data[0]);
3386 assert(s->last_picture.data[0]);
3387
3388 s->m.avctx= s->avctx;
3389 s->m.current_picture.data[0]= s->current_picture.data[0];
3390 s->m. last_picture.data[0]= s-> last_picture.data[0];
3391 s->m. new_picture.data[0]= s-> input_picture.data[0];
3392 s->m.current_picture_ptr= &s->m.current_picture;
3393 s->m. last_picture_ptr= &s->m. last_picture;
3394 s->m.linesize=
3395 s->m. last_picture.linesize[0]=
3396 s->m. new_picture.linesize[0]=
3397 s->m.current_picture.linesize[0]= stride;
155ec6ed 3398 s->m.uvlinesize= s->current_picture.linesize[1];
791e7b83
MN
3399 s->m.width = width;
3400 s->m.height= height;
3401 s->m.mb_width = block_width;
3402 s->m.mb_height= block_height;
3403 s->m.mb_stride= s->m.mb_width+1;
3404 s->m.b8_stride= 2*s->m.mb_width+1;
3405 s->m.f_code=1;
3406 s->m.pict_type= pict->pict_type;
3407 s->m.me_method= s->avctx->me_method;
3408 s->m.me.scene_change_score=0;
3409 s->m.flags= s->avctx->flags;
3410 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3411 s->m.out_format= FMT_H263;
3412 s->m.unrestricted_mv= 1;
3413
155ec6ed 3414 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
791e7b83 3415 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
155ec6ed 3416 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
791e7b83 3417
791e7b83
MN
3418 s->m.dsp= s->dsp; //move
3419 ff_init_me(&s->m);
791e7b83 3420 }
791e7b83 3421
155ec6ed
MN
3422redo_frame:
3423
791e7b83
MN
3424 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3425
3426 encode_header(s);
155ec6ed
MN
3427 encode_blocks(s);
3428
791e7b83
MN
3429 for(plane_index=0; plane_index<3; plane_index++){
3430 Plane *p= &s->plane[plane_index];
3431 int w= p->width;
3432 int h= p->height;
3433 int x, y;
39c61bbb 3434// int bits= put_bits_count(&s->c.pb);
791e7b83
MN
3435
3436 //FIXME optimize
791e7b83
MN
3437 if(pict->data[plane_index]) //FIXME gray hack
3438 for(y=0; y<h; y++){
3439 for(x=0; x<w; x++){
034aff03 3440 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
791e7b83
MN
3441 }
3442 }
3443 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
155ec6ed
MN
3444
3445 if( plane_index==0
3446 && pict->pict_type == P_TYPE
3447 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
28869757
MN
3448 ff_init_range_encoder(c, buf, buf_size);
3449 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
155ec6ed
MN
3450 pict->pict_type= FF_I_TYPE;
3451 s->keyframe=1;
3452 reset_contexts(s);
3453 goto redo_frame;
3454 }
3455
93fbdb5a
MN
3456 if(s->qlog == LOSSLESS_QLOG){
3457 for(y=0; y<h; y++){
3458 for(x=0; x<w; x++){
3cff4572 3459 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
93fbdb5a
MN
3460 }
3461 }
3462 }
791e7b83 3463
46c281e8 3464 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a 3465
791e7b83
MN
3466 for(level=0; level<s->spatial_decomposition_count; level++){
3467 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3468 SubBand *b= &p->band[level][orientation];
3469
3470 quantize(s, b, b->buf, b->stride, s->qbias);
3471 if(orientation==0)
3472 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3473 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3474 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3475 if(orientation==0)
3476 correlate(s, b, b->buf, b->stride, 1, 0);
3477 }
3478 }
3479// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3480
3481 for(level=0; level<s->spatial_decomposition_count; level++){
3482 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3483 SubBand *b= &p->band[level][orientation];
3484
3485 dequantize(s, b, b->buf, b->stride);
3486 }
3487 }
93fbdb5a 3488
46c281e8 3489 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a
MN
3490 if(s->qlog == LOSSLESS_QLOG){
3491 for(y=0; y<h; y++){
3492 for(x=0; x<w; x++){
034aff03 3493 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
93fbdb5a
MN
3494 }
3495 }
3496 }
715a97f0 3497{START_TIMER
791e7b83 3498 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
715a97f0 3499STOP_TIMER("pred-conv")}
791e7b83
MN
3500 if(s->avctx->flags&CODEC_FLAG_PSNR){
3501 int64_t error= 0;
3502
3503 if(pict->data[plane_index]) //FIXME gray hack
3504 for(y=0; y<h; y++){
3505 for(x=0; x<w; x++){
93fbdb5a 3506 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
791e7b83
MN
3507 error += d*d;
3508 }
3509 }
791e7b83 3510 s->avctx->error[plane_index] += error;
bd368b56 3511 s->current_picture.error[plane_index] = error;
791e7b83
MN
3512 }
3513 }
3514
3515 if(s->last_picture.data[0])
3516 avctx->release_buffer(avctx, &s->last_picture);
3517
3518 emms_c();
3519
28869757 3520 return ff_rac_terminate(c);
791e7b83
MN
3521}
3522
3523static void common_end(SnowContext *s){
7b49c309
MN
3524 int plane_index, level, orientation;
3525
791e7b83 3526 av_freep(&s->spatial_dwt_buffer);
791e7b83
MN
3527
3528 av_freep(&s->m.me.scratchpad);
3529 av_freep(&s->m.me.map);
3530 av_freep(&s->m.me.score_map);
155ec6ed
MN
3531
3532 av_freep(&s->block);
7b49c309
MN
3533
3534 for(plane_index=0; plane_index<3; plane_index++){
3535 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3536 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3537 SubBand *b= &s->plane[plane_index].band[level][orientation];
3538
a0d1931c 3539 av_freep(&b->x_coeff);
7b49c309
MN
3540 }
3541 }
3542 }
791e7b83
MN
3543}
3544
3545static int encode_end(AVCodecContext *avctx)
3546{
3547 SnowContext *s = avctx->priv_data;
3548
3549 common_end(s);
3550
3551 return 0;
3552}
3553
3554static int decode_init(AVCodecContext *avctx)
3555{
a0d1931c
Y
3556 SnowContext *s = avctx->priv_data;
3557 int block_size;
791e7b83
MN
3558
3559 common_init(avctx);
3560
a0d1931c
Y
3561 block_size = MB_SIZE >> s->block_max_depth;
3562 /* FIXME block_size * 2 is determined empirically. block_size * 1.5 is definitely needed, but I (Robert) cannot figure out why more than that is needed. Perhaps there is a bug, or perhaps I overlooked some demands that are placed on the buffer. */
3563 /* FIXME The formula is WRONG. For height > 480, the buffer will overflow. */
3564 /* FIXME For now, I will use a full frame of lines. Fortunately, this should not materially effect cache performance because lines are allocated using a stack, so if in fact only 50 out of 496 lines are needed at a time, the other 446 will sit allocated but never accessed. */
3565// slice_buffer_init(s->plane[0].sb, s->plane[0].height, (block_size * 2) + (s->spatial_decomposition_count * s->spatial_decomposition_count), s->plane[0].width, s->spatial_dwt_buffer);
3566 slice_buffer_init(&s->sb, s->plane[0].height, s->plane[0].height, s->plane[0].width, s->spatial_dwt_buffer);
3567
791e7b83
MN
3568 return 0;
3569}
3570
3571static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3572 SnowContext *s = avctx->priv_data;
28869757 3573 RangeCoder * const c= &s->c;
791e7b83
MN
3574 int bytes_read;
3575 AVFrame *picture = data;
39c61bbb 3576 int level, orientation, plane_index;
791e7b83 3577
28869757
MN
3578 ff_init_range_decoder(c, buf, buf_size);
3579 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
791e7b83 3580
791e7b83
MN
3581 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3582 decode_header(s);
155ec6ed 3583 if(!s->block) alloc_blocks(s);
791e7b83
MN
3584
3585 frame_start(s);
3586 //keyframe flag dupliaction mess FIXME
3587 if(avctx->debug&FF_DEBUG_PICT_INFO)
3588 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3589
155ec6ed 3590 decode_blocks(s);
791e7b83
MN
3591
3592 for(plane_index=0; plane_index<3; plane_index++){
3593 Plane *p= &s->plane[plane_index];
3594 int w= p->width;
3595 int h= p->height;
3596 int x, y;
a0d1931c
Y
3597 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
3598 SubBand * correlate_band;
791e7b83
MN
3599
3600if(s->avctx->debug&2048){
3601 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3602 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3603
3604 for(y=0; y<h; y++){
3605 for(x=0; x<w; x++){
715a97f0 3606 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
791e7b83
MN
3607 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3608 }
3609 }
3610}
791e7b83 3611
a0d1931c
Y
3612{ START_TIMER
3613 for(level=0; level<s->spatial_decomposition_count; level++){
3614 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3615 SubBand *b= &p->band[level][orientation];
3616 unpack_coeffs(s, b, b->parent, orientation);
791e7b83 3617 }
a0d1931c
Y
3618 }
3619 STOP_TIMER("unpack coeffs");
3620}
3621
3622 /* Handle level 0, orientation 0 specially. It is particularly resistant to slicing but fortunately quite small, so process it in one pass. */
3623 correlate_band = &p->band[0][0];
3624 decode_subband_slice_buffered(s, correlate_band, &s->sb, 0, correlate_band->height, decode_state[0][0]);
3625 correlate_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0);
3626 dequantize_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride);
791e7b83 3627
f9e6ebf7
LM
3628{START_TIMER
3629 const int mb_h= s->b_height << s->block_max_depth;
3630 const int block_size = MB_SIZE >> s->block_max_depth;
3631 const int block_w = plane_index ? block_size/2 : block_size;
3632 int mb_y;
3633 dwt_compose_t cs[MAX_DECOMPOSITIONS];
3634 int yd=0, yq=0;
a0d1931c
Y
3635 int y;
3636 int end_y;
f9e6ebf7 3637
a0d1931c 3638 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
f9e6ebf7 3639 for(mb_y=0; mb_y<=mb_h; mb_y++){
a0d1931c
Y
3640
3641 const int slice_starty = block_w*mb_y;
f9e6ebf7 3642 const int slice_h = block_w*(mb_y+1);
f9e6ebf7 3643
a0d1931c
Y
3644 {
3645 START_TIMER
3646 for(level=0; level<s->spatial_decomposition_count; level++){
3647 for(orientation=level ? 1 : 1; orientation<4; orientation++){
3648 SubBand *b= &p->band[level][orientation];
3649 int start_y;
3650 int end_y;
3651 int our_mb_start = mb_y;
3652 int our_mb_end = (mb_y + 1);
3653 start_y = FFMIN(b->height, (mb_y ? ((block_w * our_mb_start - 4) >> (s->spatial_decomposition_count - level)) + 5 : 0));
3654 end_y = FFMIN(b->height, (((block_w * our_mb_end - 4) >> (s->spatial_decomposition_count - level)) + 5));
3655
3656 if (start_y != end_y)
3657 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
3658 }
3659 }
3660 STOP_TIMER("decode_subband_slice");
3661 }
3662
3663{ START_TIMER
3664 for(; yd<slice_h; yd+=4){
3665 ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3666 }
3667 STOP_TIMER("idwt slice");}
3668
3669
93fbdb5a 3670 if(s->qlog == LOSSLESS_QLOG){
f9e6ebf7 3671 for(; yq<slice_h && yq<h; yq++){
a0d1931c 3672 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
93fbdb5a 3673 for(x=0; x<w; x++){
a0d1931c 3674 line[x] <<= FRAC_BITS;
93fbdb5a
MN
3675 }
3676 }
3677 }
f9e6ebf7 3678
a0d1931c
Y
3679 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3680
3681 /* Nasty hack based empirically on how predict_slice_buffered() hits the buffer. */
3682 /* FIXME If possible, make predict_slice fit into the slice. As of now, it works on some previous lines (up to slice_height / 2) if the condition on the next line is false. */
3683 if (s->keyframe || (s->avctx->debug&512)){
3684 y = FFMIN(p->height, slice_starty);
3685 end_y = FFMIN(p->height, slice_h);
3686 }
3687 else{
3688 y = FFMAX(0, FFMIN(p->height, slice_starty - (block_w >> 1)));
3689 end_y = FFMAX(0, FFMIN(p->height, slice_h - (block_w >> 1)));
3690 }
3691 while(y < end_y)
3692 slice_buffer_release(&s->sb, y++);
f9e6ebf7 3693 }
a0d1931c
Y
3694
3695 slice_buffer_flush(&s->sb);
3696
f9e6ebf7 3697STOP_TIMER("idwt + predict_slices")}
791e7b83
MN
3698 }