malloc padding to avoid reading past the malloc()ed area.
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
28869757
MN
22
23#include "rangecoder.h"
24#define MID_STATE 128
791e7b83
MN
25
26#include "mpegvideo.h"
27
28#undef NDEBUG
29#include <assert.h>
30
31#define MAX_DECOMPOSITIONS 8
32#define MAX_PLANES 4
33#define DWTELEM int
a0a74ad9 34#define QSHIFT 5
c97de57c 35#define QROOT (1<<QSHIFT)
93fbdb5a 36#define LOSSLESS_QLOG -128
034aff03 37#define FRAC_BITS 8
791e7b83
MN
38
39static const int8_t quant3[256]={
40 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56};
57static const int8_t quant3b[256]={
58 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74};
538a3841
MN
75static const int8_t quant3bA[256]={
76 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92};
791e7b83
MN
93static const int8_t quant5[256]={
94 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110};
111static const int8_t quant7[256]={
112 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128};
129static const int8_t quant9[256]={
130 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146};
147static const int8_t quant11[256]={
148 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164};
165static const int8_t quant13[256]={
166 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182};
183
715a97f0
MN
184#define LOG2_OBMC_MAX 6
185#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
791e7b83
MN
186#if 0 //64*cubic
187static const uint8_t obmc32[1024]={
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220//error:0.000022
221};
222static const uint8_t obmc16[256]={
223 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239//error:0.000033
240};
241#elif 1 // 64*linear
242static const uint8_t obmc32[1024]={
243 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275 //error:0.000020
276};
277static const uint8_t obmc16[256]={
278 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294//error:0.000015
295};
296#else //64*cos
297static const uint8_t obmc32[1024]={
298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330//error:0.000022
331};
332static const uint8_t obmc16[256]={
333 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349//error:0.000022
350};
351#endif
352
155ec6ed
MN
353//linear *64
354static const uint8_t obmc8[64]={
355 1, 3, 5, 7, 7, 5, 3, 1,
356 3, 9,15,21,21,15, 9, 3,
357 5,15,25,35,35,25,15, 5,
358 7,21,35,49,49,35,21, 7,
359 7,21,35,49,49,35,21, 7,
360 5,15,25,35,35,25,15, 5,
361 3, 9,15,21,21,15, 9, 3,
362 1, 3, 5, 7, 7, 5, 3, 1,
363//error:0.000000
364};
365
366//linear *64
367static const uint8_t obmc4[16]={
368 4,12,12, 4,
36912,36,36,12,
37012,36,36,12,
371 4,12,12, 4,
372//error:0.000000
373};
374
375static const uint8_t *obmc_tab[4]={
376 obmc32, obmc16, obmc8, obmc4
377};
378
379typedef struct BlockNode{
380 int16_t mx;
381 int16_t my;
382 uint8_t color[3];
383 uint8_t type;
384//#define TYPE_SPLIT 1
385#define BLOCK_INTRA 1
386//#define TYPE_NOCOLOR 4
387 uint8_t level; //FIXME merge into type?
388}BlockNode;
389
390#define LOG2_MB_SIZE 4
391#define MB_SIZE (1<<LOG2_MB_SIZE)
392
a0d1931c
Y
393typedef struct x_and_coeff{
394 int16_t x;
538a3841 395 uint16_t coeff;
a0d1931c
Y
396} x_and_coeff;
397
791e7b83
MN
398typedef struct SubBand{
399 int level;
400 int stride;
401 int width;
402 int height;
403 int qlog; ///< log(qscale)/log[2^(1/6)]
404 DWTELEM *buf;
a0d1931c
Y
405 int buf_x_offset;
406 int buf_y_offset;
407 int stride_line; ///< Stride measured in lines, not pixels.
408 x_and_coeff * x_coeff;
791e7b83
MN
409 struct SubBand *parent;
410 uint8_t state[/*7*2*/ 7 + 512][32];
411}SubBand;
412
413typedef struct Plane{
414 int width;
415 int height;
416 SubBand band[MAX_DECOMPOSITIONS][4];
417}Plane;
418
a0d1931c
Y
419/** Used to minimize the amount of memory used in order to optimize cache performance. **/
420typedef struct {
421 DWTELEM * * line; ///< For use by idwt and predict_slices.
422 DWTELEM * * data_stack; ///< Used for internal purposes.
423 int data_stack_top;
424 int line_count;
425 int line_width;
426 int data_count;
427 DWTELEM * base_buffer; ///< Buffer that this structure is caching.
428} slice_buffer;
429
791e7b83
MN
430typedef struct SnowContext{
431// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
432
433 AVCodecContext *avctx;
28869757 434 RangeCoder c;
791e7b83
MN
435 DSPContext dsp;
436 AVFrame input_picture;
437 AVFrame current_picture;
438 AVFrame last_picture;
439 AVFrame mconly_picture;
440// uint8_t q_context[16];
441 uint8_t header_state[32];
155ec6ed 442 uint8_t block_state[128 + 32*128];
791e7b83 443 int keyframe;
19aa028d 444 int always_reset;
791e7b83
MN
445 int version;
446 int spatial_decomposition_type;
447 int temporal_decomposition_type;
448 int spatial_decomposition_count;
449 int temporal_decomposition_count;
450 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
451 int colorspace_type;
452 int chroma_h_shift;
453 int chroma_v_shift;
454 int spatial_scalability;
455 int qlog;
155ec6ed
MN
456 int lambda;
457 int lambda2;
791e7b83
MN
458 int mv_scale;
459 int qbias;
460#define QBIAS_SHIFT 3
155ec6ed
MN
461 int b_width;
462 int b_height;
463 int block_max_depth;
791e7b83 464 Plane plane[MAX_PLANES];
155ec6ed 465 BlockNode *block;
a0d1931c 466 slice_buffer sb;
155ec6ed 467
791e7b83
MN
468 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469}SnowContext;
470
f9e6ebf7
LM
471typedef struct {
472 DWTELEM *b0;
473 DWTELEM *b1;
474 DWTELEM *b2;
475 DWTELEM *b3;
476 int y;
477} dwt_compose_t;
478
a0d1931c
Y
479#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481
482static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
483{
484 int i;
485
486 buf->base_buffer = base_buffer;
487 buf->line_count = line_count;
488 buf->line_width = line_width;
489 buf->data_count = max_allocated_lines;
490 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
491 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
492
493 for (i = 0; i < max_allocated_lines; i++)
494 {
495 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
496 }
497
498 buf->data_stack_top = max_allocated_lines - 1;
499}
500
501static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
502{
a0d1931c
Y
503 int offset;
504 DWTELEM * buffer;
505
506// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
507
508 assert(buf->data_stack_top >= 0);
509// assert(!buf->line[line]);
510 if (buf->line[line])
511 return buf->line[line];
512
513 offset = buf->line_width * line;
514 buffer = buf->data_stack[buf->data_stack_top];
515 buf->data_stack_top--;
516 buf->line[line] = buffer;
517
518// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
519
520 return buffer;
521}
522
523static void slice_buffer_release(slice_buffer * buf, int line)
524{
a0d1931c
Y
525 int offset;
526 DWTELEM * buffer;
527
528 assert(line >= 0 && line < buf->line_count);
529 assert(buf->line[line]);
530
531 offset = buf->line_width * line;
532 buffer = buf->line[line];
533 buf->data_stack_top++;
534 buf->data_stack[buf->data_stack_top] = buffer;
535 buf->line[line] = NULL;
536
537// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
538}
539
540static void slice_buffer_flush(slice_buffer * buf)
541{
542 int i;
543 for (i = 0; i < buf->line_count; i++)
544 {
545 if (buf->line[i])
546 {
547// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
548 slice_buffer_release(buf, i);
549 }
550 }
551}
552
553static void slice_buffer_destroy(slice_buffer * buf)
554{
555 int i;
556 slice_buffer_flush(buf);
557
558 for (i = buf->data_count - 1; i >= 0; i--)
559 {
560 assert(buf->data_stack[i]);
561 av_free(buf->data_stack[i]);
562 }
563 assert(buf->data_stack);
564 av_free(buf->data_stack);
565 assert(buf->line);
566 av_free(buf->line);
567}
568
2554db9b
MN
569#ifdef __sgi
570// Avoid a name clash on SGI IRIX
571#undef qexp
572#endif
034aff03 573#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 574static uint8_t qexp[QROOT];
791e7b83
MN
575
576static inline int mirror(int v, int m){
577 if (v<0) return -v;
578 else if(v>m) return 2*m-v;
579 else return v;
580}
581
28869757 582static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
583 int i;
584
585 if(v){
586 const int a= ABS(v);
587 const int e= av_log2(a);
588#if 1
589 const int el= FFMIN(e, 10);
28869757 590 put_rac(c, state+0, 0);
791e7b83
MN
591
592 for(i=0; i<el; i++){
28869757 593 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
594 }
595 for(; i<e; i++){
28869757 596 put_rac(c, state+1+9, 1); //1..10
791e7b83 597 }
28869757 598 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
599
600 for(i=e-1; i>=el; i--){
28869757 601 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
602 }
603 for(; i>=0; i--){
28869757 604 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
605 }
606
607 if(is_signed)
28869757 608 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83
MN
609#else
610
28869757 611 put_rac(c, state+0, 0);
791e7b83
MN
612 if(e<=9){
613 for(i=0; i<e; i++){
28869757 614 put_rac(c, state+1+i, 1); //1..10
791e7b83 615 }
28869757 616 put_rac(c, state+1+i, 0);
791e7b83
MN
617
618 for(i=e-1; i>=0; i--){
28869757 619 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
620 }
621
622 if(is_signed)
28869757 623 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
624 }else{
625 for(i=0; i<e; i++){
28869757 626 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 627 }
28869757 628 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
629
630 for(i=e-1; i>=0; i--){
28869757 631 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
632 }
633
634 if(is_signed)
28869757 635 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
636 }
637#endif
638 }else{
28869757 639 put_rac(c, state+0, 1);
791e7b83
MN
640 }
641}
642
28869757
MN
643static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
644 if(get_rac(c, state+0))
791e7b83
MN
645 return 0;
646 else{
7c2425d2
LM
647 int i, e, a;
648 e= 0;
28869757 649 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 650 e++;
791e7b83 651 }
7c2425d2 652
791e7b83 653 a= 1;
7c2425d2 654 for(i=e-1; i>=0; i--){
28869757 655 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
656 }
657
28869757 658 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
659 return -a;
660 else
661 return a;
662 }
663}
664
28869757 665static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 666 int i;
0635cbfc 667 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
668
669 assert(v>=0);
0635cbfc
MN
670 assert(log2>=-4);
671
672 while(v >= r){
28869757 673 put_rac(c, state+4+log2, 1);
0635cbfc 674 v -= r;
4f4e9633 675 log2++;
0635cbfc 676 if(log2>0) r+=r;
4f4e9633 677 }
28869757 678 put_rac(c, state+4+log2, 0);
4f4e9633
MN
679
680 for(i=log2-1; i>=0; i--){
28869757 681 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 682 }
4f4e9633
MN
683}
684
28869757 685static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 686 int i;
0635cbfc 687 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
688 int v=0;
689
0635cbfc
MN
690 assert(log2>=-4);
691
28869757 692 while(get_rac(c, state+4+log2)){
0635cbfc 693 v+= r;
4f4e9633 694 log2++;
0635cbfc 695 if(log2>0) r+=r;
4f4e9633
MN
696 }
697
698 for(i=log2-1; i>=0; i--){
28869757 699 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
700 }
701
702 return v;
703}
704
791e7b83
MN
705static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
706 const int mirror_left= !highpass;
707 const int mirror_right= (width&1) ^ highpass;
708 const int w= (width>>1) - 1 + (highpass & width);
709 int i;
710
711#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
712 if(mirror_left){
713 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
714 dst += dst_step;
715 src += src_step;
716 }
717
718 for(i=0; i<w; i++){
719 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
720 }
721
722 if(mirror_right){
723 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
724 }
725}
726
727static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
728 const int mirror_left= !highpass;
729 const int mirror_right= (width&1) ^ highpass;
730 const int w= (width>>1) - 1 + (highpass & width);
731 int i;
732
733 if(mirror_left){
734 int r= 3*2*ref[0];
735 r += r>>4;
736 r += r>>8;
737 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
738 dst += dst_step;
739 src += src_step;
740 }
741
742 for(i=0; i<w; i++){
743 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
744 r += r>>4;
745 r += r>>8;
746 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
747 }
748
749 if(mirror_right){
750 int r= 3*2*ref[w*ref_step];
751 r += r>>4;
752 r += r>>8;
753 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
754 }
755}
756
f5a71928
MN
757static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
758 const int mirror_left= !highpass;
759 const int mirror_right= (width&1) ^ highpass;
760 const int w= (width>>1) - 1 + (highpass & width);
761 int i;
762
763 assert(shift == 4);
764#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
765 if(mirror_left){
766 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
767 dst += dst_step;
768 src += src_step;
769 }
770
771 for(i=0; i<w; i++){
772 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
773 }
774
775 if(mirror_right){
776 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
777 }
778}
779
791e7b83 780
aa25a462 781static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
782 int x, i;
783
784 for(x=start; x<width; x+=2){
785 int64_t sum=0;
786
787 for(i=0; i<n; i++){
788 int x2= x + 2*i - n + 1;
789 if (x2< 0) x2= -x2;
790 else if(x2>=width) x2= 2*width-x2-2;
791 sum += coeffs[i]*(int64_t)dst[x2];
792 }
793 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
794 else dst[x] += (sum + (1<<shift)/2)>>shift;
795 }
796}
797
aa25a462 798static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
799 int x, y, i;
800 for(y=start; y<height; y+=2){
801 for(x=0; x<width; x++){
802 int64_t sum=0;
803
804 for(i=0; i<n; i++){
805 int y2= y + 2*i - n + 1;
806 if (y2< 0) y2= -y2;
807 else if(y2>=height) y2= 2*height-y2-2;
808 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
809 }
810 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
811 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
812 }
813 }
814}
815
816#define SCALEX 1
817#define LX0 0
818#define LX1 1
819
de890c9b 820#if 0 // more accurate 9/7
791e7b83
MN
821#define N1 2
822#define SHIFT1 14
823#define COEFFS1 (int[]){-25987,-25987}
824#define N2 2
825#define SHIFT2 19
826#define COEFFS2 (int[]){-27777,-27777}
827#define N3 2
828#define SHIFT3 15
829#define COEFFS3 (int[]){28931,28931}
830#define N4 2
831#define SHIFT4 15
832#define COEFFS4 (int[]){14533,14533}
833#elif 1 // 13/7 CRF
834#define N1 4
835#define SHIFT1 4
836#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 837#define N2 4
791e7b83
MN
838#define SHIFT2 4
839#define COEFFS2 (int[]){-1,5,5,-1}
840#define N3 0
841#define SHIFT3 1
842#define COEFFS3 NULL
843#define N4 0
844#define SHIFT4 1
845#define COEFFS4 NULL
846#elif 1 // 3/5
847#define LX0 1
848#define LX1 0
849#define SCALEX 0.5
850#define N1 2
851#define SHIFT1 1
852#define COEFFS1 (int[]){1,1}
853#define N2 2
854#define SHIFT2 2
855#define COEFFS2 (int[]){-1,-1}
856#define N3 0
857#define SHIFT3 0
858#define COEFFS3 NULL
859#define N4 0
860#define SHIFT4 0
861#define COEFFS4 NULL
862#elif 1 // 11/5
863#define N1 0
864#define SHIFT1 1
865#define COEFFS1 NULL
866#define N2 2
867#define SHIFT2 2
868#define COEFFS2 (int[]){-1,-1}
869#define N3 2
870#define SHIFT3 0
871#define COEFFS3 (int[]){-1,-1}
872#define N4 4
873#define SHIFT4 7
874#define COEFFS4 (int[]){-5,29,29,-5}
875#define SCALEX 4
876#elif 1 // 9/7 CDF
877#define N1 2
878#define SHIFT1 7
879#define COEFFS1 (int[]){-203,-203}
880#define N2 2
881#define SHIFT2 12
882#define COEFFS2 (int[]){-217,-217}
883#define N3 2
884#define SHIFT3 7
885#define COEFFS3 (int[]){113,113}
886#define N4 2
887#define SHIFT4 9
888#define COEFFS4 (int[]){227,227}
889#define SCALEX 1
890#elif 1 // 7/5 CDF
891#define N1 0
892#define SHIFT1 1
893#define COEFFS1 NULL
894#define N2 2
895#define SHIFT2 2
896#define COEFFS2 (int[]){-1,-1}
897#define N3 2
898#define SHIFT3 0
899#define COEFFS3 (int[]){-1,-1}
900#define N4 2
901#define SHIFT4 4
902#define COEFFS4 (int[]){3,3}
903#elif 1 // 9/7 MN
904#define N1 4
905#define SHIFT1 4
906#define COEFFS1 (int[]){1,-9,-9,1}
907#define N2 2
908#define SHIFT2 2
909#define COEFFS2 (int[]){1,1}
910#define N3 0
911#define SHIFT3 1
912#define COEFFS3 NULL
913#define N4 0
914#define SHIFT4 1
915#define COEFFS4 NULL
916#else // 13/7 CRF
917#define N1 4
918#define SHIFT1 4
919#define COEFFS1 (int[]){1,-9,-9,1}
920#define N2 4
921#define SHIFT2 4
922#define COEFFS2 (int[]){-1,5,5,-1}
923#define N3 0
924#define SHIFT3 1
925#define COEFFS3 NULL
926#define N4 0
927#define SHIFT4 1
928#define COEFFS4 NULL
929#endif
aa25a462
RFI
930static void horizontal_decomposeX(DWTELEM *b, int width){
931 DWTELEM temp[width];
791e7b83
MN
932 const int width2= width>>1;
933 const int w2= (width+1)>>1;
62ab0b78 934 int x;
791e7b83
MN
935
936 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
937 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
938 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
939 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
940
941 for(x=0; x<width2; x++){
942 temp[x ]= b[2*x ];
943 temp[x+w2]= b[2*x + 1];
944 }
945 if(width&1)
946 temp[x ]= b[2*x ];
947 memcpy(b, temp, width*sizeof(int));
948}
949
aa25a462
RFI
950static void horizontal_composeX(DWTELEM *b, int width){
951 DWTELEM temp[width];
791e7b83 952 const int width2= width>>1;
62ab0b78 953 int x;
791e7b83
MN
954 const int w2= (width+1)>>1;
955
956 memcpy(temp, b, width*sizeof(int));
957 for(x=0; x<width2; x++){
958 b[2*x ]= temp[x ];
959 b[2*x + 1]= temp[x+w2];
960 }
961 if(width&1)
962 b[2*x ]= temp[x ];
963
964 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
965 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
966 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
967 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
968}
969
aa25a462 970static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
971 int x, y;
972
973 for(y=0; y<height; y++){
974 for(x=0; x<width; x++){
975 buffer[y*stride + x] *= SCALEX;
976 }
977 }
978
979 for(y=0; y<height; y++){
980 horizontal_decomposeX(buffer + y*stride, width);
981 }
982
983 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
984 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
985 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
986 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
987}
988
aa25a462 989static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
990 int x, y;
991
992 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
993 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
994 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
995 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
996
997 for(y=0; y<height; y++){
998 horizontal_composeX(buffer + y*stride, width);
999 }
1000
1001 for(y=0; y<height; y++){
1002 for(x=0; x<width; x++){
1003 buffer[y*stride + x] /= SCALEX;
1004 }
1005 }
1006}
1007
aa25a462
RFI
1008static void horizontal_decompose53i(DWTELEM *b, int width){
1009 DWTELEM temp[width];
791e7b83 1010 const int width2= width>>1;
62ab0b78 1011 int x;
791e7b83
MN
1012 const int w2= (width+1)>>1;
1013
1014 for(x=0; x<width2; x++){
1015 temp[x ]= b[2*x ];
1016 temp[x+w2]= b[2*x + 1];
1017 }
1018 if(width&1)
1019 temp[x ]= b[2*x ];
1020#if 0
62ab0b78
AJ
1021 {
1022 int A1,A2,A3,A4;
791e7b83
MN
1023 A2= temp[1 ];
1024 A4= temp[0 ];
1025 A1= temp[0+width2];
1026 A1 -= (A2 + A4)>>1;
1027 A4 += (A1 + 1)>>1;
1028 b[0+width2] = A1;
1029 b[0 ] = A4;
1030 for(x=1; x+1<width2; x+=2){
1031 A3= temp[x+width2];
1032 A4= temp[x+1 ];
1033 A3 -= (A2 + A4)>>1;
1034 A2 += (A1 + A3 + 2)>>2;
1035 b[x+width2] = A3;
1036 b[x ] = A2;
1037
1038 A1= temp[x+1+width2];
1039 A2= temp[x+2 ];
1040 A1 -= (A2 + A4)>>1;
1041 A4 += (A1 + A3 + 2)>>2;
1042 b[x+1+width2] = A1;
1043 b[x+1 ] = A4;
1044 }
1045 A3= temp[width-1];
1046 A3 -= A2;
1047 A2 += (A1 + A3 + 2)>>2;
1048 b[width -1] = A3;
1049 b[width2-1] = A2;
62ab0b78 1050 }
791e7b83
MN
1051#else
1052 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1053 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1054#endif
1055}
1056
aa25a462 1057static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1058 int i;
1059
1060 for(i=0; i<width; i++){
1061 b1[i] -= (b0[i] + b2[i])>>1;
1062 }
1063}
1064
aa25a462 1065static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1066 int i;
1067
1068 for(i=0; i<width; i++){
1069 b1[i] += (b0[i] + b2[i] + 2)>>2;
1070 }
1071}
1072
aa25a462 1073static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1074 int y;
791e7b83
MN
1075 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1076 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1077
1078 for(y=-2; y<height; y+=2){
1079 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1080 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1081
1082{START_TIMER
1083 if(b1 <= b3) horizontal_decompose53i(b2, width);
1084 if(y+2 < height) horizontal_decompose53i(b3, width);
1085STOP_TIMER("horizontal_decompose53i")}
1086
1087{START_TIMER
1088 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1089 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1090STOP_TIMER("vertical_decompose53i*")}
1091
1092 b0=b2;
1093 b1=b3;
1094 }
1095}
1096
f5a71928 1097#define liftS lift
791e7b83
MN
1098#define lift5 lift
1099#if 1
1100#define W_AM 3
1101#define W_AO 0
1102#define W_AS 1
1103
f5a71928 1104#undef liftS
791e7b83
MN
1105#define W_BM 1
1106#define W_BO 8
1107#define W_BS 4
1108
f5a71928
MN
1109#define W_CM 1
1110#define W_CO 0
1111#define W_CS 0
791e7b83 1112
f5a71928
MN
1113#define W_DM 3
1114#define W_DO 4
1115#define W_DS 3
791e7b83
MN
1116#elif 0
1117#define W_AM 55
1118#define W_AO 16
1119#define W_AS 5
1120
1121#define W_BM 3
1122#define W_BO 32
1123#define W_BS 6
1124
1125#define W_CM 127
1126#define W_CO 64
1127#define W_CS 7
1128
1129#define W_DM 7
1130#define W_DO 8
1131#define W_DS 4
1132#elif 0
1133#define W_AM 97
1134#define W_AO 32
1135#define W_AS 6
1136
1137#define W_BM 63
1138#define W_BO 512
1139#define W_BS 10
1140
1141#define W_CM 13
1142#define W_CO 8
1143#define W_CS 4
1144
1145#define W_DM 15
1146#define W_DO 16
1147#define W_DS 5
1148
1149#else
1150
1151#define W_AM 203
1152#define W_AO 64
1153#define W_AS 7
1154
1155#define W_BM 217
1156#define W_BO 2048
1157#define W_BS 12
1158
1159#define W_CM 113
1160#define W_CO 64
1161#define W_CS 7
1162
1163#define W_DM 227
1164#define W_DO 128
1165#define W_DS 9
1166#endif
aa25a462
RFI
1167static void horizontal_decompose97i(DWTELEM *b, int width){
1168 DWTELEM temp[width];
791e7b83
MN
1169 const int w2= (width+1)>>1;
1170
1171 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
f5a71928 1172 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
791e7b83
MN
1173 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1174 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1175}
1176
1177
aa25a462 1178static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1179 int i;
1180
1181 for(i=0; i<width; i++){
1182 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1183 }
1184}
1185
aa25a462 1186static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1187 int i;
1188
1189 for(i=0; i<width; i++){
1190#ifdef lift5
1191 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1192#else
1193 int r= 3*(b0[i] + b2[i]);
1194 r+= r>>4;
1195 r+= r>>8;
1196 b1[i] += (r+W_CO)>>W_CS;
1197#endif
1198 }
1199}
1200
aa25a462 1201static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1202 int i;
1203
1204 for(i=0; i<width; i++){
f5a71928 1205#ifdef liftS
791e7b83 1206 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1207#else
1208 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1209#endif
791e7b83
MN
1210 }
1211}
1212
aa25a462 1213static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1214 int i;
1215
1216 for(i=0; i<width; i++){
1217 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1218 }
1219}
1220
aa25a462 1221static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1222 int y;
791e7b83
MN
1223 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1224 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1225 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1226 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1227
1228 for(y=-4; y<height; y+=2){
1229 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1230 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1231
1232{START_TIMER
1233 if(b3 <= b5) horizontal_decompose97i(b4, width);
1234 if(y+4 < height) horizontal_decompose97i(b5, width);
1235if(width>400){
1236STOP_TIMER("horizontal_decompose97i")
1237}}
1238
1239{START_TIMER
1240 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1241 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1242 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1243 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1244
1245if(width>400){
1246STOP_TIMER("vertical_decompose97i")
1247}}
1248
1249 b0=b2;
1250 b1=b3;
1251 b2=b4;
1252 b3=b5;
1253 }
1254}
1255
aa25a462 1256void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83
MN
1257 int level;
1258
46c281e8
MN
1259 for(level=0; level<decomposition_count; level++){
1260 switch(type){
791e7b83
MN
1261 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1262 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1263 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1264 }
1265 }
1266}
1267
aa25a462
RFI
1268static void horizontal_compose53i(DWTELEM *b, int width){
1269 DWTELEM temp[width];
791e7b83
MN
1270 const int width2= width>>1;
1271 const int w2= (width+1)>>1;
62ab0b78 1272 int x;
791e7b83
MN
1273
1274#if 0
62ab0b78 1275 int A1,A2,A3,A4;
791e7b83
MN
1276 A2= temp[1 ];
1277 A4= temp[0 ];
1278 A1= temp[0+width2];
1279 A1 -= (A2 + A4)>>1;
1280 A4 += (A1 + 1)>>1;
1281 b[0+width2] = A1;
1282 b[0 ] = A4;
1283 for(x=1; x+1<width2; x+=2){
1284 A3= temp[x+width2];
1285 A4= temp[x+1 ];
1286 A3 -= (A2 + A4)>>1;
1287 A2 += (A1 + A3 + 2)>>2;
1288 b[x+width2] = A3;
1289 b[x ] = A2;
1290
1291 A1= temp[x+1+width2];
1292 A2= temp[x+2 ];
1293 A1 -= (A2 + A4)>>1;
1294 A4 += (A1 + A3 + 2)>>2;
1295 b[x+1+width2] = A1;
1296 b[x+1 ] = A4;
1297 }
1298 A3= temp[width-1];
1299 A3 -= A2;
1300 A2 += (A1 + A3 + 2)>>2;
1301 b[width -1] = A3;
1302 b[width2-1] = A2;
1303#else
1304 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1305 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1306#endif
1307 for(x=0; x<width2; x++){
1308 b[2*x ]= temp[x ];
1309 b[2*x + 1]= temp[x+w2];
1310 }
1311 if(width&1)
1312 b[2*x ]= temp[x ];
1313}
1314
aa25a462 1315static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1316 int i;
1317
1318 for(i=0; i<width; i++){
1319 b1[i] += (b0[i] + b2[i])>>1;
1320 }
1321}
1322
aa25a462 1323static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1324 int i;
1325
1326 for(i=0; i<width; i++){
1327 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1328 }
1329}
1330
a0d1931c
Y
1331static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1332 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1333 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1334 cs->y = -1;
1335}
1336
f9e6ebf7
LM
1337static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1338 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1339 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1340 cs->y = -1;
1341}
1342
a0d1931c
Y
1343static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1344 int y= cs->y;
1345 int mirror0 = mirror(y-1, height-1);
1346 int mirror1 = mirror(y , height-1);
1347 int mirror2 = mirror(y+1, height-1);
1348 int mirror3 = mirror(y+2, height-1);
1349
1350 DWTELEM *b0= cs->b0;
1351 DWTELEM *b1= cs->b1;
1352 DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1353 DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1354
1355{START_TIMER
1356 if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1357 if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1358STOP_TIMER("vertical_compose53i*")}
1359
1360{START_TIMER
1361 if(y-1 >= 0) horizontal_compose53i(b0, width);
1362 if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1363STOP_TIMER("horizontal_compose53i")}
1364
1365 cs->b0 = b2;
1366 cs->b1 = b3;
1367 cs->y += 2;
1368}
1369
f9e6ebf7
LM
1370static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1371 int y= cs->y;
1372 DWTELEM *b0= cs->b0;
1373 DWTELEM *b1= cs->b1;
1374 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1375 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1376
1377{START_TIMER
1378 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1379 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1380STOP_TIMER("vertical_compose53i*")}
1381
1382{START_TIMER
1383 if(y-1 >= 0) horizontal_compose53i(b0, width);
1384 if(b0 <= b2) horizontal_compose53i(b1, width);
1385STOP_TIMER("horizontal_compose53i")}
1386
f9e6ebf7
LM
1387 cs->b0 = b2;
1388 cs->b1 = b3;
1389 cs->y += 2;
1390}
1391
1392static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1393 dwt_compose_t cs;
1394 spatial_compose53i_init(&cs, buffer, height, stride);
1395 while(cs.y <= height)
1396 spatial_compose53i_dy(&cs, buffer, width, height, stride);
791e7b83
MN
1397}
1398
1399
aa25a462
RFI
1400static void horizontal_compose97i(DWTELEM *b, int width){
1401 DWTELEM temp[width];
791e7b83
MN
1402 const int w2= (width+1)>>1;
1403
1404 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1405 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
f5a71928 1406 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
791e7b83
MN
1407 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1408}
1409
aa25a462 1410static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1411 int i;
1412
1413 for(i=0; i<width; i++){
1414 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1415 }
1416}
1417
aa25a462 1418static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1419 int i;
1420
1421 for(i=0; i<width; i++){
1422#ifdef lift5
1423 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1424#else
1425 int r= 3*(b0[i] + b2[i]);
1426 r+= r>>4;
1427 r+= r>>8;
1428 b1[i] -= (r+W_CO)>>W_CS;
1429#endif
1430 }
1431}
1432
aa25a462 1433static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1434 int i;
1435
1436 for(i=0; i<width; i++){
f5a71928 1437#ifdef liftS
791e7b83 1438 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1439#else
1440 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1441#endif
791e7b83
MN
1442 }
1443}
1444
aa25a462 1445static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1446 int i;
1447
1448 for(i=0; i<width; i++){
1449 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1450 }
1451}
1452
565a45ac
MN
1453static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1454 int i;
1455
1456 for(i=0; i<width; i++){
62ab0b78 1457#ifndef lift5
565a45ac 1458 int r;
62ab0b78 1459#endif
565a45ac
MN
1460 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1461#ifdef lift5
1462 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1463#else
1464 r= 3*(b2[i] + b4[i]);
1465 r+= r>>4;
1466 r+= r>>8;
1467 b3[i] -= (r+W_CO)>>W_CS;
1468#endif
f5a71928 1469#ifdef liftS
565a45ac 1470 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1471#else
1472 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1473#endif
565a45ac
MN
1474 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1475 }
1476}
1477
a0d1931c
Y
1478static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1479 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1480 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1481 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1482 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1483 cs->y = -3;
1484}
1485
f9e6ebf7
LM
1486static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1487 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1488 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1489 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1490 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1491 cs->y = -3;
1492}
791e7b83 1493
a0d1931c
Y
1494static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1495 int y = cs->y;
1496
1497 int mirror0 = mirror(y - 1, height - 1);
1498 int mirror1 = mirror(y + 0, height - 1);
1499 int mirror2 = mirror(y + 1, height - 1);
1500 int mirror3 = mirror(y + 2, height - 1);
1501 int mirror4 = mirror(y + 3, height - 1);
1502 int mirror5 = mirror(y + 4, height - 1);
1503 DWTELEM *b0= cs->b0;
1504 DWTELEM *b1= cs->b1;
1505 DWTELEM *b2= cs->b2;
1506 DWTELEM *b3= cs->b3;
1507 DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1508 DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1509
a0d1931c 1510{START_TIMER
565a45ac
MN
1511 if(y>0 && y+4<height){
1512 vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1513 }else{
1514 if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1515 if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1516 if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1517 if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1518 }
a0d1931c
Y
1519if(width>400){
1520STOP_TIMER("vertical_compose97i")}}
a0d1931c
Y
1521
1522{START_TIMER
1523 if(y-1>= 0) horizontal_compose97i(b0, width);
1524 if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1525if(width>400 && mirror0 <= mirror2){
1526STOP_TIMER("horizontal_compose97i")}}
1527
1528 cs->b0=b2;
1529 cs->b1=b3;
1530 cs->b2=b4;
1531 cs->b3=b5;
1532 cs->y += 2;
1533}
1534
f9e6ebf7
LM
1535static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1536 int y = cs->y;
1537 DWTELEM *b0= cs->b0;
1538 DWTELEM *b1= cs->b1;
1539 DWTELEM *b2= cs->b2;
1540 DWTELEM *b3= cs->b3;
1541 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1542 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83
MN
1543
1544 if(stride == width && y+4 < height && 0){
1545 int x;
1546 for(x=0; x<width/2; x++)
1547 b5[x] += 64*2;
1548 for(; x<width; x++)
1549 b5[x] += 169*2;
1550 }
1551
1552{START_TIMER
1553 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1554 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1555 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1556 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1557if(width>400){
1558STOP_TIMER("vertical_compose97i")}}
1559
1560{START_TIMER
1561 if(y-1>= 0) horizontal_compose97i(b0, width);
1562 if(b0 <= b2) horizontal_compose97i(b1, width);
1563if(width>400 && b0 <= b2){
1564STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1565
1566 cs->b0=b2;
1567 cs->b1=b3;
1568 cs->b2=b4;
1569 cs->b3=b5;
1570 cs->y += 2;
1571}
1572
1573static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1574 dwt_compose_t cs;
1575 spatial_compose97i_init(&cs, buffer, height, stride);
1576 while(cs.y <= height)
1577 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1578}
1579
a0d1931c
Y
1580void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1581 int level;
1582 for(level=decomposition_count-1; level>=0; level--){
1583 switch(type){
1584 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1585 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1586 /* not slicified yet */
1587 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1588 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1589 }
1590 }
1591}
1592
f9e6ebf7
LM
1593void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1594 int level;
1595 for(level=decomposition_count-1; level>=0; level--){
1596 switch(type){
1597 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1598 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1599 /* not slicified yet */
1600 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1601 }
791e7b83
MN
1602 }
1603}
1604
f9e6ebf7
LM
1605void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1606 const int support = type==1 ? 3 : 5;
791e7b83 1607 int level;
f9e6ebf7 1608 if(type==2) return;
791e7b83 1609
46c281e8 1610 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1611 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1612 switch(type){
1613 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1614 break;
1615 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1616 break;
1617 case 2: break;
1618 }
791e7b83
MN
1619 }
1620 }
1621}
1622
a0d1931c
Y
1623void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1624 const int support = type==1 ? 3 : 5;
1625 int level;
1626 if(type==2) return;
1627
1628 for(level=decomposition_count-1; level>=0; level--){
1629 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1630 switch(type){
1631 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1632 break;
1633 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1634 break;
1635 case 2: break;
1636 }
1637 }
1638 }
1639}
1640
f9e6ebf7
LM
1641void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1642 if(type==2){
1643 int level;
1644 for(level=decomposition_count-1; level>=0; level--)
1645 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1646 }else{
1647 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1648 int y;
1649 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1650 for(y=0; y<height; y+=4)
1651 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1652 }
1653}
1654
0ecca7a4 1655static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1656 const int w= b->width;
1657 const int h= b->height;
1658 int x, y;
1659
791e7b83 1660 if(1){
791e7b83 1661 int run=0;
a8d73e56 1662 int runs[w*h];
791e7b83 1663 int run_index=0;
b44985ba 1664 int max_index;
791e7b83 1665
791e7b83
MN
1666 for(y=0; y<h; y++){
1667 for(x=0; x<w; x++){
78486403 1668 int v, p=0;
6b2f6646 1669 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1670 v= src[x + y*stride];
791e7b83
MN
1671
1672 if(y){
a8d73e56 1673 t= src[x + (y-1)*stride];
791e7b83 1674 if(x){
a8d73e56 1675 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1676 }
1677 if(x + 1 < w){
a8d73e56 1678 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1679 }
1680 }
1681 if(x){
a8d73e56 1682 l= src[x - 1 + y*stride];
6b2f6646
MN
1683 /*if(x > 1){
1684 if(orientation==1) ll= src[y + (x-2)*stride];
1685 else ll= src[x - 2 + y*stride];
791e7b83
MN
1686 }*/
1687 }
78486403 1688 if(parent){
a8d73e56
MN
1689 int px= x>>1;
1690 int py= y>>1;
78486403
MN
1691 if(px<b->parent->width && py<b->parent->height)
1692 p= parent[px + py*2*stride];
1693 }
1694 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1695 if(v){
1696 runs[run_index++]= run;
1697 run=0;
1698 }else{
1699 run++;
1700 }
1701 }
1702 }
1703 }
b44985ba 1704 max_index= run_index;
791e7b83
MN
1705 runs[run_index++]= run;
1706 run_index=0;
1707 run= runs[run_index++];
1708
b44985ba
MN
1709 put_symbol2(&s->c, b->state[30], max_index, 0);
1710 if(run_index <= max_index)
1711 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1712
1713 for(y=0; y<h; y++){
d06c75a8 1714 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1715 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1716 return -1;
1717 }
791e7b83 1718 for(x=0; x<w; x++){
78486403 1719 int v, p=0;
6b2f6646 1720 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1721 v= src[x + y*stride];
791e7b83
MN
1722
1723 if(y){
a8d73e56 1724 t= src[x + (y-1)*stride];
791e7b83 1725 if(x){
a8d73e56 1726 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1727 }
1728 if(x + 1 < w){
a8d73e56 1729 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1730 }
1731 }
1732 if(x){
a8d73e56 1733 l= src[x - 1 + y*stride];
6b2f6646
MN
1734 /*if(x > 1){
1735 if(orientation==1) ll= src[y + (x-2)*stride];
1736 else ll= src[x - 2 + y*stride];
791e7b83
MN
1737 }*/
1738 }
78486403 1739 if(parent){
a8d73e56
MN
1740 int px= x>>1;
1741 int py= y>>1;
78486403
MN
1742 if(px<b->parent->width && py<b->parent->height)
1743 p= parent[px + py*2*stride];
1744 }
1745 if(/*ll|*/l|lt|t|rt|p){
1746 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1747
28869757 1748 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1749 }else{
1750 if(!run){
1751 run= runs[run_index++];
4f4e9633 1752
b44985ba
MN
1753 if(run_index <= max_index)
1754 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1755 assert(v);
1756 }else{
1757 run--;
1758 assert(!v);
1759 }
1760 }
1761 if(v){
78486403 1762 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
538a3841
MN
1763 int l2= 2*ABS(l) + (l<0);
1764 int t2= 2*ABS(t) + (t<0);
6b2f6646 1765
0635cbfc 1766 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
538a3841 1767 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1768 }
1769 }
1770 }
791e7b83 1771 }
0ecca7a4 1772 return 0;
791e7b83
MN
1773}
1774
0ecca7a4 1775static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1776// encode_subband_qtree(s, b, src, parent, stride, orientation);
1777// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1778 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1779// encode_subband_dzr(s, b, src, parent, stride, orientation);
1780}
1781
a0d1931c 1782static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1783 const int w= b->width;
1784 const int h= b->height;
1785 int x,y;
ea7d9cd4 1786
791e7b83 1787 if(1){
b44985ba 1788 int run, runs;
cbb1d2b1
MN
1789 x_and_coeff *xc= b->x_coeff;
1790 x_and_coeff *prev_xc= NULL;
1791 x_and_coeff *prev2_xc= xc;
1792 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1793 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1794
b44985ba
MN
1795 runs= get_symbol2(&s->c, b->state[30], 0);
1796 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1797 else run= INT_MAX;
1798
791e7b83 1799 for(y=0; y<h; y++){
0cea8a03
MN
1800 int v=0;
1801 int lt=0, t=0, rt=0;
1802
cbb1d2b1
MN
1803 if(y && prev_xc->x == 0){
1804 rt= prev_xc->coeff;
0cea8a03 1805 }
791e7b83 1806 for(x=0; x<w; x++){
0cea8a03
MN
1807 int p=0;
1808 const int l= v;
1809
1810 lt= t; t= rt;
791e7b83 1811
ff765159 1812 if(y){
cbb1d2b1
MN
1813 if(prev_xc->x <= x)
1814 prev_xc++;
1815 if(prev_xc->x == x + 1)
1816 rt= prev_xc->coeff;
ff765159
MN
1817 else
1818 rt=0;
1819 }
cbb1d2b1
MN
1820 if(parent_xc){
1821 if(x>>1 > parent_xc->x){
1822 parent_xc++;
7b49c309 1823 }
cbb1d2b1
MN
1824 if(x>>1 == parent_xc->x){
1825 p= parent_xc->coeff;
ff765159 1826 }
78486403
MN
1827 }
1828 if(/*ll|*/l|lt|t|rt|p){
538a3841 1829 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1830
28869757 1831 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1832 if(v){
1833 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1834 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1835
cbb1d2b1
MN
1836 xc->x=x;
1837 (xc++)->coeff= v;
3c096ac7 1838 }
791e7b83
MN
1839 }else{
1840 if(!run){
b44985ba
MN
1841 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1842 else run= INT_MAX;
3c096ac7
MN
1843 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1844 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1845
cbb1d2b1
MN
1846 xc->x=x;
1847 (xc++)->coeff= v;
791e7b83 1848 }else{
99cd59e5 1849 int max_run;
791e7b83
MN
1850 run--;
1851 v=0;
3c1adccd 1852
cbb1d2b1 1853 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1854 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1855 if(parent_xc)
1856 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1857 x+= max_run;
1858 run-= max_run;
791e7b83
MN
1859 }
1860 }
7b49c309 1861 }
cbb1d2b1
MN
1862 (xc++)->x= w+1; //end marker
1863 prev_xc= prev2_xc;
1864 prev2_xc= xc;
7b49c309 1865
cbb1d2b1 1866 if(parent_xc){
7b49c309 1867 if(y&1){
cbb1d2b1
MN
1868 while(parent_xc->x != parent->width+1)
1869 parent_xc++;
1870 parent_xc++;
1871 prev_parent_xc= parent_xc;
7b49c309 1872 }else{
cbb1d2b1 1873 parent_xc= prev_parent_xc;
791e7b83
MN
1874 }
1875 }
1876 }
a0d1931c 1877
cbb1d2b1 1878 (xc++)->x= w+1; //end marker
a0d1931c
Y
1879 }
1880}
1881
1882static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1883 const int w= b->width;
62ab0b78 1884 int y;
c97de57c
MN
1885 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1886 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1887 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1888 int new_index = 0;
1889
1890 START_TIMER
1891
1892 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1893 qadd= 0;
1894 qmul= 1<<QEXPSHIFT;
1895 }
1896
1897 /* If we are on the second or later slice, restore our index. */
1898 if (start_y != 0)
1899 new_index = save_state[0];
1900
791e7b83 1901
a0d1931c
Y
1902 for(y=start_y; y<h; y++){
1903 int x = 0;
1904 int v;
1905 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1906 memset(line, 0, b->width*sizeof(DWTELEM));
1907 v = b->x_coeff[new_index].coeff;
1908 x = b->x_coeff[new_index++].x;
1909 while(x < w)
1910 {
538a3841
MN
1911 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1912 register int u= -(v&1);
1913 line[x] = (t^u) - u;
1914
a0d1931c
Y
1915 v = b->x_coeff[new_index].coeff;
1916 x = b->x_coeff[new_index++].x;
1917 }
791e7b83 1918 }
a0d1931c
Y
1919 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1920 STOP_TIMER("decode_subband")
1921 }
1922
1923 /* Save our variables for the next slice. */
1924 save_state[0] = new_index;
1925
1926 return;
791e7b83
MN
1927}
1928
1929static void reset_contexts(SnowContext *s){
1930 int plane_index, level, orientation;
1931
19aa028d 1932 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1933 for(level=0; level<s->spatial_decomposition_count; level++){
1934 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1935 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1936 }
1937 }
1938 }
28869757
MN
1939 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1940 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1941}
1942
1943static int alloc_blocks(SnowContext *s){
1944 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1945 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1946
1947 s->b_width = w;
1948 s->b_height= h;
1949
1950 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1951 return 0;
1952}
1953
28869757
MN
1954static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1955 uint8_t *bytestream= d->bytestream;
1956 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1957 *d= *s;
28869757
MN
1958 d->bytestream= bytestream;
1959 d->bytestream_start= bytestream_start;
155ec6ed
MN
1960}
1961
1962//near copy & paste from dsputil, FIXME
1963static int pix_sum(uint8_t * pix, int line_size, int w)
1964{
1965 int s, i, j;
1966
1967 s = 0;
1968 for (i = 0; i < w; i++) {
1969 for (j = 0; j < w; j++) {
1970 s += pix[0];
1971 pix ++;
1972 }
1973 pix += line_size - w;
1974 }
1975 return s;
1976}
1977
1978//near copy & paste from dsputil, FIXME
1979static int pix_norm1(uint8_t * pix, int line_size, int w)
1980{
1981 int s, i, j;
1982 uint32_t *sq = squareTbl + 256;
1983
1984 s = 0;
1985 for (i = 0; i < w; i++) {
1986 for (j = 0; j < w; j ++) {
1987 s += sq[pix[0]];
1988 pix ++;
1989 }
1990 pix += line_size - w;
1991 }
1992 return s;
1993}
1994
1995static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1996 const int w= s->b_width << s->block_max_depth;
1997 const int rem_depth= s->block_max_depth - level;
1998 const int index= (x + y*w) << rem_depth;
1999 const int block_w= 1<<rem_depth;
2000 BlockNode block;
2001 int i,j;
2002
2003 block.color[0]= l;
2004 block.color[1]= cb;
2005 block.color[2]= cr;
2006 block.mx= mx;
2007 block.my= my;
2008 block.type= type;
2009 block.level= level;
2010
2011 for(j=0; j<block_w; j++){
2012 for(i=0; i<block_w; i++){
2013 s->block[index + i + j*w]= block;
2014 }
2015 }
2016}
2017
2018static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2019 const int offset[3]= {
2020 y*c-> stride + x,
2021 ((y*c->uvstride + x)>>1),
2022 ((y*c->uvstride + x)>>1),
2023 };
2024 int i;
2025 for(i=0; i<3; i++){
2026 c->src[0][i]= src [i];
2027 c->ref[0][i]= ref [i] + offset[i];
2028 }
2029 assert(!ref_index);
2030}
2031
2032//FIXME copy&paste
2033#define P_LEFT P[1]
2034#define P_TOP P[2]
2035#define P_TOPRIGHT P[3]
2036#define P_MEDIAN P[4]
2037#define P_MV1 P[9]
2038#define FLAG_QPEL 1 //must be 1
2039
2040static int encode_q_branch(SnowContext *s, int level, int x, int y){
2041 uint8_t p_buffer[1024];
2042 uint8_t i_buffer[1024];
2043 uint8_t p_state[sizeof(s->block_state)];
2044 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
2045 RangeCoder pc, ic;
2046 uint8_t *pbbak= s->c.bytestream;
2047 uint8_t *pbbak_start= s->c.bytestream_start;
155ec6ed
MN
2048 int score, score2, iscore, i_len, p_len, block_s, sum;
2049 const int w= s->b_width << s->block_max_depth;
2050 const int h= s->b_height << s->block_max_depth;
2051 const int rem_depth= s->block_max_depth - level;
2052 const int index= (x + y*w) << rem_depth;
2053 const int block_w= 1<<(LOG2_MB_SIZE - level);
2054 static BlockNode null_block= { //FIXME add border maybe
2055 .color= {128,128,128},
2056 .mx= 0,
2057 .my= 0,
2058 .type= 0,
2059 .level= 0,
2060 };
2061 int trx= (x+1)<<rem_depth;
2062 int try= (y+1)<<rem_depth;
2063 BlockNode *left = x ? &s->block[index-1] : &null_block;
2064 BlockNode *top = y ? &s->block[index-w] : &null_block;
2065 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2066 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2067 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2068 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2069 int pl = left->color[0];
2070 int pcb= left->color[1];
2071 int pcr= left->color[2];
2072 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2073 int pmy= mid_pred(left->my, top->my, tr->my);
2074 int mx=0, my=0;
2075 int l,cr,cb, i;
2076 const int stride= s->current_picture.linesize[0];
2077 const int uvstride= s->current_picture.linesize[1];
2078 const int instride= s->input_picture.linesize[0];
2079 const int uvinstride= s->input_picture.linesize[1];
2080 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
2081 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2082 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2083 uint8_t current_mb[3][stride*block_w];
2084 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2085 int P[10][2];
2086 int16_t last_mv[3][2];
2087 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2088 const int shift= 1+qpel;
2089 MotionEstContext *c= &s->m.me;
2090 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2091 int my_context= av_log2(2*ABS(left->my - top->my));
2092 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2093
2094 assert(sizeof(s->block_state) >= 256);
2095 if(s->keyframe){
2096 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2097 return 0;
2098 }
2099
2100 //FIXME optimize
2101 for(i=0; i<block_w; i++)
2102 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
2103 for(i=0; i<block_w>>1; i++)
2104 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2105 for(i=0; i<block_w>>1; i++)
2106 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2107
2108// clip predictors / edge ?
2109
2110 P_LEFT[0]= left->mx;
2111 P_LEFT[1]= left->my;
2112 P_TOP [0]= top->mx;
2113 P_TOP [1]= top->my;
2114 P_TOPRIGHT[0]= tr->mx;
2115 P_TOPRIGHT[1]= tr->my;
2116
2117 last_mv[0][0]= s->block[index].mx;
2118 last_mv[0][1]= s->block[index].my;
2119 last_mv[1][0]= right->mx;
2120 last_mv[1][1]= right->my;
2121 last_mv[2][0]= bottom->mx;
2122 last_mv[2][1]= bottom->my;
2123
2124 s->m.mb_stride=2;
2125 s->m.mb_x=
2126 s->m.mb_y= 0;
2127 s->m.me.skip= 0;
2128
2129 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2130
2131 assert(s->m.me. stride == stride);
2132 assert(s->m.me.uvstride == uvstride);
2133
2134 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2135 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2136 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2137 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2138
ff158dc9
MN
2139 c->xmin = - x*block_w - 16+2;
2140 c->ymin = - y*block_w - 16+2;
2141 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2142 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
2143
2144 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2145 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2146 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2147 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2148 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2149 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2150 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2151
2152 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2153 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2154
2155 if (!y) {
2156 c->pred_x= P_LEFT[0];
2157 c->pred_y= P_LEFT[1];
2158 } else {
2159 c->pred_x = P_MEDIAN[0];
2160 c->pred_y = P_MEDIAN[1];
2161 }
2162
2163 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2164 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2165
2166 assert(mx >= c->xmin);
2167 assert(mx <= c->xmax);
2168 assert(my >= c->ymin);
2169 assert(my <= c->ymax);
2170
2171 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2172 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2173 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2174
2175 // subpel search
2176 pc= s->c;
28869757
MN
2177 pc.bytestream_start=
2178 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2179 memcpy(p_state, s->block_state, sizeof(s->block_state));
2180
2181 if(level!=s->block_max_depth)
28869757
MN
2182 put_rac(&pc, &p_state[4 + s_context], 1);
2183 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
155ec6ed
MN
2184 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2185 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
28869757
MN
2186 p_len= pc.bytestream - pc.bytestream_start;
2187 score += (s->lambda2*(p_len*8
2188 + (pc.outstanding_count - s->c.outstanding_count)*8
2189 + (-av_log2(pc.range) + av_log2(s->c.range))
2190 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2191
2192 block_s= block_w*block_w;
2193 sum = pix_sum(&current_mb[0][0], stride, block_w);
2194 l= (sum + block_s/2)/block_s;
2195 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2196
2197 block_s= block_w*block_w>>2;
2198 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2199 cb= (sum + block_s/2)/block_s;
2200// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2201 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2202 cr= (sum + block_s/2)/block_s;
2203// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2204
2205 ic= s->c;
28869757
MN
2206 ic.bytestream_start=
2207 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2208 memcpy(i_state, s->block_state, sizeof(s->block_state));
2209 if(level!=s->block_max_depth)
28869757
MN
2210 put_rac(&ic, &i_state[4 + s_context], 1);
2211 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
2212 put_symbol(&ic, &i_state[32], l-pl , 1);
2213 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2214 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757
MN
2215 i_len= ic.bytestream - ic.bytestream_start;
2216 iscore += (s->lambda2*(i_len*8
2217 + (ic.outstanding_count - s->c.outstanding_count)*8
2218 + (-av_log2(ic.range) + av_log2(s->c.range))
2219 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2220
2221// assert(score==256*256*256*64-1);
2222 assert(iscore < 255*255*256 + s->lambda2*10);
2223 assert(iscore >= 0);
2224 assert(l>=0 && l<=255);
2225 assert(pl>=0 && pl<=255);
2226
2227 if(level==0){
2228 int varc= iscore >> 8;
2229 int vard= score >> 8;
2230 if (vard <= 64 || vard < varc)
2231 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2232 else
2233 c->scene_change_score+= s->m.qscale;
2234 }
2235
2236 if(level!=s->block_max_depth){
28869757 2237 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
2238 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2239 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2240 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2241 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2242 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2243
2244 if(score2 < score && score2 < iscore)
2245 return score2;
2246 }
2247
2248 if(iscore < score){
28869757 2249 memcpy(pbbak, i_buffer, i_len);
155ec6ed 2250 s->c= ic;
28869757
MN
2251 s->c.bytestream_start= pbbak_start;
2252 s->c.bytestream= pbbak + i_len;
155ec6ed
MN
2253 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2254 memcpy(s->block_state, i_state, sizeof(s->block_state));
2255 return iscore;
2256 }else{
28869757 2257 memcpy(pbbak, p_buffer, p_len);
155ec6ed 2258 s->c= pc;
28869757
MN
2259 s->c.bytestream_start= pbbak_start;
2260 s->c.bytestream= pbbak + p_len;
155ec6ed
MN
2261 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2262 memcpy(s->block_state, p_state, sizeof(s->block_state));
2263 return score;
2264 }
2265}
2266
2267static void decode_q_branch(SnowContext *s, int level, int x, int y){
2268 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2269 const int rem_depth= s->block_max_depth - level;
2270 const int index= (x + y*w) << rem_depth;
2271 static BlockNode null_block= { //FIXME add border maybe
2272 .color= {128,128,128},
2273 .mx= 0,
2274 .my= 0,
2275 .type= 0,
2276 .level= 0,
2277 };
2278 int trx= (x+1)<<rem_depth;
155ec6ed
MN
2279 BlockNode *left = x ? &s->block[index-1] : &null_block;
2280 BlockNode *top = y ? &s->block[index-w] : &null_block;
2281 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2282 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2283 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2284
2285 if(s->keyframe){
2286 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2287 return;
2288 }
2289
28869757 2290 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
155ec6ed
MN
2291 int type;
2292 int l = left->color[0];
2293 int cb= left->color[1];
2294 int cr= left->color[2];
2295 int mx= mid_pred(left->mx, top->mx, tr->mx);
2296 int my= mid_pred(left->my, top->my, tr->my);
2297 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2298 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2299
28869757 2300 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2301
2302 if(type){
2303 l += get_symbol(&s->c, &s->block_state[32], 1);
2304 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2305 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2306 }else{
2307 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2308 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2309 }
2310 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2311 }else{
2312 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2313 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2314 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2315 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2316 }
2317}
2318
2319static void encode_blocks(SnowContext *s){
2320 int x, y;
2321 int w= s->b_width;
2322 int h= s->b_height;
2323
2324 for(y=0; y<h; y++){
d06c75a8 2325 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2326 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2327 return;
2328 }
155ec6ed
MN
2329 for(x=0; x<w; x++){
2330 encode_q_branch(s, 0, x, y);
2331 }
2332 }
2333}
2334
2335static void decode_blocks(SnowContext *s){
2336 int x, y;
2337 int w= s->b_width;
2338 int h= s->b_height;
2339
2340 for(y=0; y<h; y++){
2341 for(x=0; x<w; x++){
2342 decode_q_branch(s, 0, x, y);
2343 }
2344 }
791e7b83
MN
2345}
2346
2347static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2348 int x, y;
3924dac4 2349START_TIMER
791e7b83
MN
2350 for(y=0; y < b_h+5; y++){
2351 for(x=0; x < b_w; x++){
3924dac4
MN
2352 int a0= src[x ];
2353 int a1= src[x + 1];
2354 int a2= src[x + 2];
2355 int a3= src[x + 3];
2356 int a4= src[x + 4];
2357 int a5= src[x + 5];
791e7b83
MN
2358// int am= 9*(a1+a2) - (a0+a3);
2359 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2360// int am= 18*(a2+a3) - 2*(a1+a4);
2361// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2362// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2363
2364// if(b_w==16) am= 8*(a1+a2);
2365
8c2515bb
Y
2366 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2367 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2368
2369 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2370 if(am&(~255)) am= ~(am>>31);
2371
2372 tmp[x] = am;
791e7b83
MN
2373
2374/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2375 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2376 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2377 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2378 }
3924dac4
MN
2379 tmp += stride;
2380 src += stride;
791e7b83 2381 }
3924dac4
MN
2382 tmp -= (b_h+5)*stride;
2383
791e7b83
MN
2384 for(y=0; y < b_h; y++){
2385 for(x=0; x < b_w; x++){
3924dac4
MN
2386 int a0= tmp[x + 0*stride];
2387 int a1= tmp[x + 1*stride];
2388 int a2= tmp[x + 2*stride];
2389 int a3= tmp[x + 3*stride];
2390 int a4= tmp[x + 4*stride];
2391 int a5= tmp[x + 5*stride];
791e7b83
MN
2392 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2393// int am= 18*(a2+a3) - 2*(a1+a4);
2394/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2395 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2396
2397// if(b_w==16) am= 8*(a1+a2);
2398
8c2515bb
Y
2399 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2400 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83 2401
8c2515bb
Y
2402 if(am&(~255)) am= ~(am>>31);
2403
2404 dst[x] = am;
791e7b83
MN
2405/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2406 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2407 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2408 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2409 }
3924dac4
MN
2410 dst += stride;
2411 tmp += stride;
791e7b83 2412 }
3924dac4 2413STOP_TIMER("mc_block")
791e7b83
MN
2414}
2415
791e7b83 2416#define mca(dx,dy,b_w)\
d92b5807 2417static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
791e7b83
MN
2418 uint8_t tmp[stride*(b_w+5)];\
2419 assert(h==b_w);\
2420 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2421}
2422
2423mca( 0, 0,16)
2424mca( 8, 0,16)
2425mca( 0, 8,16)
2426mca( 8, 8,16)
d92b5807
MN
2427mca( 0, 0,8)
2428mca( 8, 0,8)
2429mca( 0, 8,8)
2430mca( 8, 8,8)
791e7b83 2431
ff158dc9
MN
2432static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2433 if(block->type){
2434 int x, y;
2435 const int color= block->color[plane_index];
2436 for(y=0; y < b_h; y++){
2437 for(x=0; x < b_w; x++){
2438 dst[x + y*stride]= color;
2439 }
2440 }
2441 }else{
2442 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2443 int mx= block->mx*scale;
2444 int my= block->my*scale;
ec697587
MN
2445 const int dx= mx&15;
2446 const int dy= my&15;
ff158dc9
MN
2447 sx += (mx>>4) - 2;
2448 sy += (my>>4) - 2;
2449 src += sx + sy*stride;
2450 if( (unsigned)sx >= w - b_w - 4
2451 || (unsigned)sy >= h - b_h - 4){
2452 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2453 src= tmp + MB_SIZE;
2454 }
ec697587
MN
2455 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2456 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2457 else
2458 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
ff158dc9
MN
2459 }
2460}
2461
2462static always_inline int same_block(BlockNode *a, BlockNode *b){
2463 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2464}
2465
2466//FIXME name clenup (b_w, block_w, b_width stuff)
a0d1931c
Y
2467static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2468 DWTELEM * dst = NULL;
2469 const int b_width = s->b_width << s->block_max_depth;
2470 const int b_height= s->b_height << s->block_max_depth;
2471 const int b_stride= b_width;
2472 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2473 BlockNode *rt= lt+1;
2474 BlockNode *lb= lt+b_stride;
2475 BlockNode *rb= lb+1;
2476 uint8_t *block[4];
cc884a35
MN
2477 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2478 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2479 uint8_t *ptmp;
a0d1931c
Y
2480 int x,y;
2481
2482 if(b_x<0){
2483 lt= rt;
2484 lb= rb;
2485 }else if(b_x + 1 >= b_width){
2486 rt= lt;
2487 rb= lb;
2488 }
2489 if(b_y<0){
2490 lt= lb;
2491 rt= rb;
2492 }else if(b_y + 1 >= b_height){
2493 lb= lt;
2494 rb= rt;
2495 }
2496
2497 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2498 obmc -= src_x;
2499 b_w += src_x;
2500 src_x=0;
2501 }else if(src_x + b_w > w){
2502 b_w = w - src_x;
2503 }
2504 if(src_y<0){
2505 obmc -= src_y*obmc_stride;
2506 b_h += src_y;
2507 src_y=0;
2508 }else if(src_y + b_h> h){
2509 b_h = h - src_y;
2510 }
2511
2512 if(b_w<=0 || b_h<=0) return;
2513
cc884a35 2514assert(src_stride > 2*MB_SIZE + 5);
a0d1931c
Y
2515// old_dst += src_x + src_y*dst_stride;
2516 dst8+= src_x + src_y*src_stride;
2517// src += src_x + src_y*src_stride;
2518
cc884a35
MN
2519 ptmp= tmp + 3*tmp_step;
2520 block[0]= ptmp;
2521 ptmp+=tmp_step;
a0d1931c
Y
2522 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2523
2524 if(same_block(lt, rt)){
2525 block[1]= block[0];
2526 }else{
cc884a35
MN
2527 block[1]= ptmp;
2528 ptmp+=tmp_step;
a0d1931c
Y
2529 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2530 }
2531
2532 if(same_block(lt, lb)){
2533 block[2]= block[0];
2534 }else if(same_block(rt, lb)){
2535 block[2]= block[1];
2536 }else{
cc884a35
MN
2537 block[2]= ptmp;
2538 ptmp+=tmp_step;
a0d1931c
Y
2539 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2540 }
2541
2542 if(same_block(lt, rb) ){
2543 block[3]= block[0];
2544 }else if(same_block(rt, rb)){
2545 block[3]= block[1];
2546 }else if(same_block(lb, rb)){
2547 block[3]= block[2];
2548 }else{
cc884a35 2549 block[3]= ptmp;
a0d1931c
Y
2550 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2551 }
2552#if 0
2553 for(y=0; y<b_h; y++){
2554 for(x=0; x<b_w; x++){
2555 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2556 if(add) dst[x + y*dst_stride] += v;
2557 else dst[x + y*dst_stride] -= v;
2558 }
2559 }
2560 for(y=0; y<b_h; y++){
2561 uint8_t *obmc2= obmc + (obmc_stride>>1);
2562 for(x=0; x<b_w; x++){
2563 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2564 if(add) dst[x + y*dst_stride] += v;
2565 else dst[x + y*dst_stride] -= v;
2566 }
2567 }
2568 for(y=0; y<b_h; y++){
2569 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2570 for(x=0; x<b_w; x++){
2571 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2572 if(add) dst[x + y*dst_stride] += v;
2573 else dst[x + y*dst_stride] -= v;
2574 }
2575 }
2576 for(y=0; y<b_h; y++){
2577 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2578 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2579 for(x=0; x<b_w; x++){
2580 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2581 if(add) dst[x + y*dst_stride] += v;
2582 else dst[x + y*dst_stride] -= v;
2583 }
2584 }
2585#else
2586{
2587
2588 START_TIMER
2589
a0d1931c
Y
2590 for(y=0; y<b_h; y++){
2591 //FIXME ugly missue of obmc_stride
2592 uint8_t *obmc1= obmc + y*obmc_stride;
2593 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2594 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2595 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2596 dst = slice_buffer_get_line(sb, src_y + y);
2597 for(x=0; x<b_w; x++){
2598 int v= obmc1[x] * block[3][x + y*src_stride]
2599 +obmc2[x] * block[2][x + y*src_stride]
2600 +obmc3[x] * block[1][x + y*src_stride]
2601 +obmc4[x] * block[0][x + y*src_stride];
2602
2603 v <<= 8 - LOG2_OBMC_MAX;
2604 if(FRAC_BITS != 8){
2605 v += 1<<(7 - FRAC_BITS);
2606 v >>= 8 - FRAC_BITS;
2607 }
2608 if(add){
2609// v += old_dst[x + y*dst_stride];
2610 v += dst[x + src_x];
2611 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2612 if(v&(~255)) v= ~(v>>31);
2613 dst8[x + y*src_stride] = v;
2614 }else{
2615// old_dst[x + y*dst_stride] -= v;
2616 dst[x + src_x] -= v;
2617 }
2618 }
2619 }
2620 STOP_TIMER("Inner add y block")
2621}
2622#endif
2623}
2624
2625//FIXME name clenup (b_w, block_w, b_width stuff)
715a97f0 2626static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
ff158dc9
MN
2627 const int b_width = s->b_width << s->block_max_depth;
2628 const int b_height= s->b_height << s->block_max_depth;
2629 const int b_stride= b_width;
2630 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2631 BlockNode *rt= lt+1;
2632 BlockNode *lb= lt+b_stride;
2633 BlockNode *rb= lb+1;
2634 uint8_t *block[4];
cc884a35
MN
2635 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2636 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2637 uint8_t *ptmp;
791e7b83
MN
2638 int x,y;
2639
ff158dc9
MN
2640 if(b_x<0){
2641 lt= rt;
2642 lb= rb;
2643 }else if(b_x + 1 >= b_width){
2644 rt= lt;
2645 rb= lb;
791e7b83 2646 }
ff158dc9
MN
2647 if(b_y<0){
2648 lt= lb;
2649 rt= rb;
2650 }else if(b_y + 1 >= b_height){
2651 lb= lt;
2652 rb= rt;
2653 }
2654
2655 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2656 obmc -= src_x;
2657 b_w += src_x;
2658 src_x=0;
2659 }else if(src_x + b_w > w){
2660 b_w = w - src_x;
2661 }
2662 if(src_y<0){
2663 obmc -= src_y*obmc_stride;
2664 b_h += src_y;
2665 src_y=0;
2666 }else if(src_y + b_h> h){
2667 b_h = h - src_y;
791e7b83 2668 }
620ab797 2669
ff158dc9 2670 if(b_w<=0 || b_h<=0) return;
155ec6ed 2671
cc884a35 2672assert(src_stride > 2*MB_SIZE + 5);
ff158dc9 2673 dst += src_x + src_y*dst_stride;
715a97f0 2674 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2675// src += src_x + src_y*src_stride;
2676
cc884a35
MN
2677 ptmp= tmp + 3*tmp_step;
2678 block[0]= ptmp;
2679 ptmp+=tmp_step;
ff158dc9
MN
2680 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2681
2682 if(same_block(lt, rt)){
2683 block[1]= block[0];
791e7b83 2684 }else{
cc884a35
MN
2685 block[1]= ptmp;
2686 ptmp+=tmp_step;
ff158dc9
MN
2687 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2688 }
2689
2690 if(same_block(lt, lb)){
2691 block[2]= block[0];
2692 }else if(same_block(rt, lb)){
2693 block[2]= block[1];
2694 }else{
cc884a35
MN
2695 block[2]= ptmp;
2696 ptmp+=tmp_step;
ff158dc9
MN
2697 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2698 }
791e7b83 2699
ff158dc9
MN
2700 if(same_block(lt, rb) ){
2701 block[3]= block[0];
2702 }else if(same_block(rt, rb)){
2703 block[3]= block[1];
2704 }else if(same_block(lb, rb)){
2705 block[3]= block[2];
2706 }else{
cc884a35 2707 block[3]= ptmp;
ff158dc9
MN
2708 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2709 }
2710#if 0
2711 for(y=0; y<b_h; y++){
2712 for(x=0; x<b_w; x++){
2713 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2714 if(add) dst[x + y*dst_stride] += v;
2715 else dst[x + y*dst_stride] -= v;
2716 }
2717 }
2718 for(y=0; y<b_h; y++){
2719 uint8_t *obmc2= obmc + (obmc_stride>>1);
2720 for(x=0; x<b_w; x++){
2721 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2722 if(add) dst[x + y*dst_stride] += v;
2723 else dst[x + y*dst_stride] -= v;
2724 }
2725 }
2726 for(y=0; y<b_h; y++){
2727 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2728 for(x=0; x<b_w; x++){
2729 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2730 if(add) dst[x + y*dst_stride] += v;
2731 else dst[x + y*dst_stride] -= v;
2732 }
2733 }
2734 for(y=0; y<b_h; y++){
2735 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2736 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2737 for(x=0; x<b_w; x++){
2738 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2739 if(add) dst[x + y*dst_stride] += v;
2740 else dst[x + y*dst_stride] -= v;
2741 }
2742 }
2743#else
2744 for(y=0; y<b_h; y++){
2745 //FIXME ugly missue of obmc_stride
2746 uint8_t *obmc1= obmc + y*obmc_stride;
2747 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2748 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2749 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2750 for(x=0; x<b_w; x++){
2751 int v= obmc1[x] * block[3][x + y*src_stride]
2752 +obmc2[x] * block[2][x + y*src_stride]
2753 +obmc3[x] * block[1][x + y*src_stride]
2754 +obmc4[x] * block[0][x + y*src_stride];
715a97f0
MN
2755
2756 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2757 if(FRAC_BITS != 8){
2758 v += 1<<(7 - FRAC_BITS);
2759 v >>= 8 - FRAC_BITS;
2760 }
715a97f0
MN
2761 if(add){
2762 v += dst[x + y*dst_stride];
2763 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2764 if(v&(~255)) v= ~(v>>31);
2765 dst8[x + y*src_stride] = v;
2766 }else{
2767 dst[x + y*dst_stride] -= v;
2768 }
791e7b83
MN
2769 }
2770 }
ff158dc9 2771#endif
791e7b83
MN
2772}
2773
a0d1931c
Y
2774static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2775 Plane *p= &s->plane[plane_index];
2776 const int mb_w= s->b_width << s->block_max_depth;
2777 const int mb_h= s->b_height << s->block_max_depth;
2778 int x, y, mb_x;
2779 int block_size = MB_SIZE >> s->block_max_depth;
2780 int block_w = plane_index ? block_size/2 : block_size;
2781 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2782 int obmc_stride= plane_index ? block_size : 2*block_size;
2783 int ref_stride= s->current_picture.linesize[plane_index];
2784 uint8_t *ref = s->last_picture.data[plane_index];
2785 uint8_t *dst8= s->current_picture.data[plane_index];
2786 int w= p->width;
2787 int h= p->height;
2788 START_TIMER
2789
2790 if(s->keyframe || (s->avctx->debug&512)){
2791 if(mb_y==mb_h)
2792 return;
2793
2794 if(add){
86e59cc0 2795 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2796 {
2797// DWTELEM * line = slice_buffer_get_line(sb, y);
2798 DWTELEM * line = sb->line[y];
2799 for(x=0; x<w; x++)
2800 {
2801// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2802 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2803 v >>= FRAC_BITS;
2804 if(v&(~255)) v= ~(v>>31);
2805 dst8[x + y*ref_stride]= v;
2806 }
2807 }
2808 }else{
86e59cc0 2809 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2810 {
2811// DWTELEM * line = slice_buffer_get_line(sb, y);
2812 DWTELEM * line = sb->line[y];
2813 for(x=0; x<w; x++)
2814 {
2815 line[x] -= 128 << FRAC_BITS;
2816// buf[x + y*w]-= 128<<FRAC_BITS;
2817 }
2818 }
2819 }
2820
2821 return;
2822 }
2823
2824 for(mb_x=0; mb_x<=mb_w; mb_x++){
2825 START_TIMER
2826
2827 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2828 block_w*mb_x - block_w/2,
2829 block_w*mb_y - block_w/2,
2830 block_w, block_w,
2831 w, h,
2832 w, ref_stride, obmc_stride,
2833 mb_x - 1, mb_y - 1,
2834 add, plane_index);
2835
2836 STOP_TIMER("add_yblock")
2837 }
2838
2839 STOP_TIMER("predict_slice")
2840}
2841
f9e6ebf7 2842static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2843 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2844 const int mb_w= s->b_width << s->block_max_depth;
2845 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2846 int x, y, mb_x;
155ec6ed
MN
2847 int block_size = MB_SIZE >> s->block_max_depth;
2848 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2849 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
155ec6ed 2850 int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2851 int ref_stride= s->current_picture.linesize[plane_index];
791e7b83 2852 uint8_t *ref = s->last_picture.data[plane_index];
715a97f0 2853 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2854 int w= p->width;
2855 int h= p->height;
fff6d4ea 2856 START_TIMER
791e7b83 2857
ff158dc9 2858 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2859 if(mb_y==mb_h)
2860 return;
2861
715a97f0 2862 if(add){
86e59cc0 2863 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2864 for(x=0; x<w; x++){
2865 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2866 v >>= FRAC_BITS;
2867 if(v&(~255)) v= ~(v>>31);
2868 dst8[x + y*ref_stride]= v;
2869 }
2870 }
2871 }else{
86e59cc0 2872 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2873 for(x=0; x<w; x++){
2874 buf[x + y*w]-= 128<<FRAC_BITS;
2875 }
ff158dc9 2876 }
791e7b83 2877 }
ff158dc9
MN
2878
2879 return;
791e7b83
MN
2880 }
2881
ff158dc9 2882 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2883 START_TIMER
ff158dc9 2884
715a97f0 2885 add_yblock(s, buf, dst8, ref, obmc,
ff158dc9 2886 block_w*mb_x - block_w/2,
791e7b83 2887 block_w*mb_y - block_w/2,
ff158dc9 2888 block_w, block_w,
791e7b83 2889 w, h,
ff158dc9
MN
2890 w, ref_stride, obmc_stride,
2891 mb_x - 1, mb_y - 1,
2892 add, plane_index);
2893
2894 STOP_TIMER("add_yblock")
791e7b83 2895 }
fff6d4ea 2896
f9e6ebf7
LM
2897 STOP_TIMER("predict_slice")
2898}
2899
2900static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2901 const int mb_h= s->b_height << s->block_max_depth;
2902 int mb_y;
2903 for(mb_y=0; mb_y<=mb_h; mb_y++)
2904 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2905}
2906
2907static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2908 const int level= b->level;
2909 const int w= b->width;
2910 const int h= b->height;
c97de57c
MN
2911 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2912 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
da66b631 2913 int x,y, thres1, thres2;
62ab0b78 2914// START_TIMER
791e7b83 2915
93fbdb5a
MN
2916 if(s->qlog == LOSSLESS_QLOG) return;
2917
791e7b83 2918 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
2919 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2920 thres2= 2*thres1;
791e7b83
MN
2921
2922 if(!bias){
2923 for(y=0; y<h; y++){
2924 for(x=0; x<w; x++){
da66b631
MN
2925 int i= src[x + y*stride];
2926
2927 if((unsigned)(i+thres1) > thres2){
2928 if(i>=0){
2929 i<<= QEXPSHIFT;
2930 i/= qmul; //FIXME optimize
2931 src[x + y*stride]= i;
2932 }else{
2933 i= -i;
2934 i<<= QEXPSHIFT;
2935 i/= qmul; //FIXME optimize
2936 src[x + y*stride]= -i;
2937 }
2938 }else
2939 src[x + y*stride]= 0;
791e7b83
MN
2940 }
2941 }
2942 }else{
2943 for(y=0; y<h; y++){
2944 for(x=0; x<w; x++){
2945 int i= src[x + y*stride];
2946
da66b631
MN
2947 if((unsigned)(i+thres1) > thres2){
2948 if(i>=0){
2949 i<<= QEXPSHIFT;
2950 i= (i + bias) / qmul; //FIXME optimize
2951 src[x + y*stride]= i;
2952 }else{
2953 i= -i;
2954 i<<= QEXPSHIFT;
2955 i= (i + bias) / qmul; //FIXME optimize
2956 src[x + y*stride]= -i;
2957 }
2958 }else
2959 src[x + y*stride]= 0;
791e7b83
MN
2960 }
2961 }
2962 }
da66b631
MN
2963 if(level+1 == s->spatial_decomposition_count){
2964// STOP_TIMER("quantize")
2965 }
791e7b83
MN
2966}
2967
66b32bf2 2968static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
a0d1931c 2969 const int w= b->width;
c97de57c
MN
2970 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2971 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
2972 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2973 int x,y;
2974 START_TIMER
2975
2976 if(s->qlog == LOSSLESS_QLOG) return;
2977
66b32bf2 2978 for(y=start_y; y<end_y; y++){
a0d1931c
Y
2979// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2980 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2981 for(x=0; x<w; x++){
2982 int i= line[x];
2983 if(i<0){
2984 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2985 }else if(i>0){
2986 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
2987 }
2988 }
2989 }
2990 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2991 STOP_TIMER("dquant")
2992 }
2993}
2994
791e7b83 2995static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
791e7b83
MN
2996 const int w= b->width;
2997 const int h= b->height;
c97de57c
MN
2998 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2999 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
791e7b83
MN
3000 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3001 int x,y;
ea7d9cd4 3002 START_TIMER
791e7b83 3003
93fbdb5a
MN
3004 if(s->qlog == LOSSLESS_QLOG) return;
3005
791e7b83
MN
3006 for(y=0; y<h; y++){
3007 for(x=0; x<w; x++){
3008 int i= src[x + y*stride];
3009 if(i<0){
3010 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3011 }else if(i>0){
3012 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3013 }
3014 }
3015 }
ea7d9cd4
MN
3016 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3017 STOP_TIMER("dquant")
3018 }
791e7b83
MN
3019}
3020
3021static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3022 const int w= b->width;
3023 const int h= b->height;
3024 int x,y;
3025
3026 for(y=h-1; y>=0; y--){
3027 for(x=w-1; x>=0; x--){
3028 int i= x + y*stride;
3029
3030 if(x){
3031 if(use_median){
3032 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3033 else src[i] -= src[i - 1];
3034 }else{
3035 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3036 else src[i] -= src[i - 1];
3037 }
3038 }else{
3039 if(y) src[i] -= src[i - stride];
3040 }
3041 }
3042 }
3043}
3044
66b32bf2 3045static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
a0d1931c 3046 const int w= b->width;
a0d1931c
Y
3047 int x,y;
3048
3049// START_TIMER
3050
3051 DWTELEM * line;
3052 DWTELEM * prev;
3053
66b32bf2
Y
3054 if (start_y != 0)
3055 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3056
3057 for(y=start_y; y<end_y; y++){
a0d1931c
Y
3058 prev = line;
3059// line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3060 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3061 for(x=0; x<w; x++){
3062 if(x){
3063 if(use_median){
3064 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3065 else line[x] += line[x - 1];
3066 }else{
3067 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3068 else line[x] += line[x - 1];
3069 }
3070 }else{
3071 if(y) line[x] += prev[x];
3072 }
3073 }
3074 }
3075
3076// STOP_TIMER("correlate")
3077}
3078
791e7b83
MN
3079static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3080 const int w= b->width;
3081 const int h= b->height;
3082 int x,y;
3083
3084 for(y=0; y<h; y++){
3085 for(x=0; x<w; x++){
3086 int i= x + y*stride;
3087
3088 if(x){
3089 if(use_median){
3090 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3091 else src[i] += src[i - 1];
3092 }else{
3093 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3094 else src[i] += src[i - 1];
3095 }
3096 }else{
3097 if(y) src[i] += src[i - stride];
3098 }
3099 }
3100 }
3101}
3102
3103static void encode_header(SnowContext *s){
3104 int plane_index, level, orientation;
28869757
MN
3105 uint8_t kstate[32];
3106
3107 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 3108
28869757 3109 put_rac(&s->c, kstate, s->keyframe);
19aa028d
MN
3110 if(s->keyframe || s->always_reset)
3111 reset_contexts(s);
791e7b83
MN
3112 if(s->keyframe){
3113 put_symbol(&s->c, s->header_state, s->version, 0);
28869757 3114 put_rac(&s->c, s->header_state, s->always_reset);
791e7b83
MN
3115 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3116 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3117 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3118 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
791e7b83
MN
3119 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3120 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
28869757
MN
3121 put_rac(&s->c, s->header_state, s->spatial_scalability);
3122// put_rac(&s->c, s->header_state, s->rate_scalability);
791e7b83
MN
3123
3124 for(plane_index=0; plane_index<2; plane_index++){
3125 for(level=0; level<s->spatial_decomposition_count; level++){
3126 for(orientation=level ? 1:0; orientation<4; orientation++){
3127 if(orientation==2) continue;
3128 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3129 }
3130 }
3131 }
3132 }
3133 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3134 put_symbol(&s->c, s->header_state, s->qlog, 1);
3135 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3136 put_symbol(&s->c, s->header_state, s->qbias, 1);
155ec6ed 3137 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
791e7b83
MN
3138}
3139
3140static int decode_header(SnowContext *s){
3141 int plane_index, level, orientation;
28869757
MN
3142 uint8_t kstate[32];
3143
3144 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 3145
28869757 3146 s->keyframe= get_rac(&s->c, kstate);
19aa028d
MN
3147 if(s->keyframe || s->always_reset)
3148 reset_contexts(s);
791e7b83
MN
3149 if(s->keyframe){
3150 s->version= get_symbol(&s->c, s->header_state, 0);
3151 if(s->version>0){
3152 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3153 return -1;
3154 }
28869757 3155 s->always_reset= get_rac(&s->c, s->header_state);
791e7b83
MN
3156 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3157 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3158 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3159 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
3160 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3161 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
28869757
MN
3162 s->spatial_scalability= get_rac(&s->c, s->header_state);
3163// s->rate_scalability= get_rac(&s->c, s->header_state);
791e7b83
MN
3164
3165 for(plane_index=0; plane_index<3; plane_index++){
3166 for(level=0; level<s->spatial_decomposition_count; level++){
3167 for(orientation=level ? 1:0; orientation<4; orientation++){
3168 int q;
3169 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3170 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3171 else q= get_symbol(&s->c, s->header_state, 1);
3172 s->plane[plane_index].band[level][orientation].qlog= q;
3173 }
3174 }
3175 }
3176 }
3177
3178 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3179 if(s->spatial_decomposition_type > 2){
3180 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3181 return -1;
3182 }
3183
3184 s->qlog= get_symbol(&s->c, s->header_state, 1);
3185 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3186 s->qbias= get_symbol(&s->c, s->header_state, 1);
155ec6ed 3187 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
4393e639
MN
3188 if(s->block_max_depth > 1){
3189 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3190 s->block_max_depth= 0;
3191 return -1;
3192 }
791e7b83
MN
3193
3194 return 0;
3195}
3196
c97de57c
MN
3197static void init_qexp(){
3198 int i;
3199 double v=128;
3200
3201 for(i=0; i<QROOT; i++){
3202 qexp[i]= lrintf(v);
3203 v *= pow(2, 1.0 / QROOT);
3204 }
3205}
3206
791e7b83
MN
3207static int common_init(AVCodecContext *avctx){
3208 SnowContext *s = avctx->priv_data;
3209 int width, height;
3210 int level, orientation, plane_index, dec;
3211
3212 s->avctx= avctx;
3213
3214 dsputil_init(&s->dsp, avctx);
3215
3216#define mcf(dx,dy)\
3217 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3218 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
d92b5807
MN
3219 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3220 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3221 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3222 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
791e7b83
MN
3223
3224 mcf( 0, 0)
3225 mcf( 4, 0)
3226 mcf( 8, 0)
3227 mcf(12, 0)
3228 mcf( 0, 4)
3229 mcf( 4, 4)
3230 mcf( 8, 4)
3231 mcf(12, 4)
3232 mcf( 0, 8)
3233 mcf( 4, 8)
3234 mcf( 8, 8)
3235 mcf(12, 8)
3236 mcf( 0,12)
3237 mcf( 4,12)
3238 mcf( 8,12)
3239 mcf(12,12)
3240
3241#define mcfh(dx,dy)\
3242 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3243 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
d92b5807
MN
3244 mc_block_hpel ## dx ## dy ## 16;\
3245 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3246 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3247 mc_block_hpel ## dx ## dy ## 8;
791e7b83
MN
3248
3249 mcfh(0, 0)
3250 mcfh(8, 0)
3251 mcfh(0, 8)
3252 mcfh(8, 8)
c97de57c
MN
3253
3254 if(!qexp[0])
3255 init_qexp();
3256
791e7b83
MN
3257 dec= s->spatial_decomposition_count= 5;
3258 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3259
3260 s->chroma_h_shift= 1; //FIXME XXX
3261 s->chroma_v_shift= 1;
3262
3263// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3264
155ec6ed
MN
3265 width= s->avctx->width;
3266 height= s->avctx->height;
3267
3268 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
791e7b83
MN
3269
3270 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
155ec6ed 3271 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
791e7b83
MN
3272
3273 for(plane_index=0; plane_index<3; plane_index++){
3274 int w= s->avctx->width;
3275 int h= s->avctx->height;
3276
3277 if(plane_index){
3278 w>>= s->chroma_h_shift;
3279 h>>= s->chroma_v_shift;
3280 }
3281 s->plane[plane_index].width = w;
3282 s->plane[plane_index].height= h;
3bb9f096 3283//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
791e7b83
MN
3284 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3285 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3286 SubBand *b= &s->plane[plane_index].band[level][orientation];
3287
3288 b->buf= s->spatial_dwt_buffer;
3289 b->level= level;
3290 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3291 b->width = (w + !(orientation&1))>>1;
3292 b->height= (h + !(orientation>1))>>1;
3293
a0d1931c
Y
3294 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3295 b->buf_x_offset = 0;
3296 b->buf_y_offset = 0;
3297
3298 if(orientation&1){
3299 b->buf += (w+1)>>1;
3300 b->buf_x_offset = (w+1)>>1;
3301 }
3302 if(orientation>1){
3303 b->buf += b->stride>>1;
3304 b->buf_y_offset = b->stride_line >> 1;
3305 }
791e7b83
MN
3306
3307 if(level)
3308 b->parent= &s->plane[plane_index].band[level-1][orientation];
a0d1931c 3309 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
791e7b83
MN
3310 }
3311 w= (w+1)>>1;
3312 h= (h+1)>>1;
3313 }
3314 }
3315
791e7b83
MN
3316 reset_contexts(s);
3317/*
3318 width= s->width= avctx->width;
3319 height= s->height= avctx->height;
3320
3321 assert(width && height);
3322*/
3323 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3324
3325 return 0;
3326}
3327
3328
3329static void calculate_vissual_weight(SnowContext *s, Plane *p){
3330 int width = p->width;
3331 int height= p->height;
39c61bbb 3332 int level, orientation, x, y;
791e7b83
MN
3333
3334 for(level=0; level<s->spatial_decomposition_count; level++){
3335 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3336 SubBand *b= &p->band[level][orientation];
3337 DWTELEM *buf= b->buf;
3338 int64_t error=0;
3339
3340 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3341 buf[b->width/2 + b->height/2*b->stride]= 256*256;
46c281e8 3342 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3343 for(y=0; y<height; y++){
3344 for(x=0; x<width; x++){
3345 int64_t d= s->spatial_dwt_buffer[x + y*width];
3346 error += d*d;
3347 }
3348 }
3349
3350 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
46c281e8 3351// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
791e7b83
MN
3352 }
3353 }
3354}
3355
3356static int encode_init(AVCodecContext *avctx)
3357{
3358 SnowContext *s = avctx->priv_data;
39c61bbb 3359 int plane_index;
791e7b83 3360
9cd81798 3361 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
160d679c 3362 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
9cd81798 3363 "use vstrict=-2 / -strict -2 to use it anyway\n");
2ff9ff5b
MN
3364 return -1;
3365 }
3366
791e7b83 3367 common_init(avctx);
155ec6ed 3368 alloc_blocks(s);
791e7b83
MN
3369
3370 s->version=0;
3371
2cd34043
LM
3372 s->m.avctx = avctx;
3373 s->m.flags = avctx->flags;
3374 s->m.bit_rate= avctx->bit_rate;
3375
791e7b83
MN
3376 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3377 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3378 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
791e7b83
MN
3379 h263_encode_init(&s->m); //mv_penalty
3380
2cd34043
LM
3381 if(avctx->flags&CODEC_FLAG_PASS1){
3382 if(!avctx->stats_out)
3383 avctx->stats_out = av_mallocz(256);
3384 }
3385 if(avctx->flags&CODEC_FLAG_PASS2){
3386 if(ff_rate_control_init(&s->m) < 0)
3387 return -1;
3388 }
3389
791e7b83
MN
3390 for(plane_index=0; plane_index<3; plane_index++){
3391 calculate_vissual_weight(s, &s->plane[plane_index]);
3392 }
3393
3394
3395 avctx->coded_frame= &s->current_picture;
3396 switch(avctx->pix_fmt){
3397// case PIX_FMT_YUV444P:
3398// case PIX_FMT_YUV422P:
3399 case PIX_FMT_YUV420P:
3400 case PIX_FMT_GRAY8:
3401// case PIX_FMT_YUV411P:
3402// case PIX_FMT_YUV410P:
3403 s->colorspace_type= 0;
3404 break;
3405/* case PIX_FMT_RGBA32:
3406 s->colorspace= 1;
3407 break;*/
3408 default:
3409 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3410 return -1;
3411 }
3412// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3413 s->chroma_h_shift= 1;
3414 s->chroma_v_shift= 1;
3415 return 0;
3416}
3417
3418static int frame_start(SnowContext *s){
3419 AVFrame tmp;
64886072
MN
3420 int w= s->avctx->width; //FIXME round up to x16 ?
3421 int h= s->avctx->height;
791e7b83 3422
64886072
MN
3423 if(s->current_picture.data[0]){
3424 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
3425 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3426 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3427 }
3428
791e7b83
MN
3429 tmp= s->last_picture;
3430 s->last_picture= s->current_picture;
3431 s->current_picture= tmp;
3432
3433 s->current_picture.reference= 1;
3434 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3435 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3436 return -1;
3437 }
3438
3439 return 0;
3440}
3441
3442static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3443 SnowContext *s = avctx->priv_data;
28869757 3444 RangeCoder * const c= &s->c;
791e7b83
MN
3445 AVFrame *pict = data;
3446 const int width= s->avctx->width;
3447 const int height= s->avctx->height;
39c61bbb 3448 int level, orientation, plane_index;
791e7b83 3449
28869757
MN
3450 ff_init_range_encoder(c, buf, buf_size);
3451 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
791e7b83
MN
3452
3453 s->input_picture = *pict;
3454
2cd34043
LM
3455 if(avctx->flags&CODEC_FLAG_PASS2){
3456 s->m.pict_type =
3457 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3458 s->keyframe= pict->pict_type==FF_I_TYPE;
3459 s->m.picture_number= avctx->frame_number;
3460 pict->quality= ff_rate_estimate_qscale(&s->m);
3461 }else{
3462 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3463 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3464 }
791e7b83 3465
93fbdb5a
MN
3466 if(pict->quality){
3467 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3468 //<64 >60
c97de57c 3469 s->qlog += 61*QROOT/8;
93fbdb5a
MN
3470 }else{
3471 s->qlog= LOSSLESS_QLOG;
3472 }
791e7b83 3473
791e7b83 3474 frame_start(s);
19aa028d 3475 s->current_picture.key_frame= s->keyframe;
791e7b83 3476
2cd34043 3477 s->m.current_picture_ptr= &s->m.current_picture;
791e7b83
MN
3478 if(pict->pict_type == P_TYPE){
3479 int block_width = (width +15)>>4;
3480 int block_height= (height+15)>>4;
3481 int stride= s->current_picture.linesize[0];
791e7b83
MN
3482
3483 assert(s->current_picture.data[0]);
3484 assert(s->last_picture.data[0]);
3485
3486 s->m.avctx= s->avctx;
3487 s->m.current_picture.data[0]= s->current_picture.data[0];
3488 s->m. last_picture.data[0]= s-> last_picture.data[0];
3489 s->m. new_picture.data[0]= s-> input_picture.data[0];
791e7b83
MN
3490 s->m. last_picture_ptr= &s->m. last_picture;
3491 s->m.linesize=
3492 s->m. last_picture.linesize[0]=
3493 s->m. new_picture.linesize[0]=
3494 s->m.current_picture.linesize[0]= stride;
155ec6ed 3495 s->m.uvlinesize= s->current_picture.linesize[1];
791e7b83
MN
3496 s->m.width = width;
3497 s->m.height= height;
3498 s->m.mb_width = block_width;
3499 s->m.mb_height= block_height;
3500 s->m.mb_stride= s->m.mb_width+1;
3501 s->m.b8_stride= 2*s->m.mb_width+1;
3502 s->m.f_code=1;
3503 s->m.pict_type= pict->pict_type;
3504 s->m.me_method= s->avctx->me_method;
3505 s->m.me.scene_change_score=0;
3506 s->m.flags= s->avctx->flags;
3507 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3508 s->m.out_format= FMT_H263;
3509 s->m.unrestricted_mv= 1;
3510
155ec6ed 3511 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
791e7b83 3512 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
155ec6ed 3513 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
791e7b83 3514
791e7b83
MN
3515 s->m.dsp= s->dsp; //move
3516 ff_init_me(&s->m);
791e7b83 3517 }
791e7b83 3518
155ec6ed
MN
3519redo_frame:
3520
791e7b83
MN
3521 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3522
3523 encode_header(s);
2cd34043 3524 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
155ec6ed 3525 encode_blocks(s);
2cd34043 3526 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
155ec6ed 3527
791e7b83
MN
3528 for(plane_index=0; plane_index<3; plane_index++){
3529 Plane *p= &s->plane[plane_index];
3530 int w= p->width;
3531 int h= p->height;
3532 int x, y;
39c61bbb 3533// int bits= put_bits_count(&s->c.pb);
791e7b83
MN
3534
3535 //FIXME optimize
791e7b83
MN
3536 if(pict->data[plane_index]) //FIXME gray hack
3537 for(y=0; y<h; y++){
3538 for(x=0; x<w; x++){
034aff03 3539 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
791e7b83
MN
3540 }
3541 }
3542 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
155ec6ed
MN
3543
3544 if( plane_index==0
3545 && pict->pict_type == P_TYPE
3546 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
28869757
MN
3547 ff_init_range_encoder(c, buf, buf_size);
3548 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
155ec6ed
MN
3549 pict->pict_type= FF_I_TYPE;
3550 s->keyframe=1;
3551 reset_contexts(s);
3552 goto redo_frame;
3553 }
3554
93fbdb5a
MN
3555 if(s->qlog == LOSSLESS_QLOG){
3556 for(y=0; y<h; y++){
3557 for(x=0; x<w; x++){
3cff4572 3558 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
93fbdb5a
MN
3559 }
3560 }
3561 }
791e7b83 3562
46c281e8 3563 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a 3564
791e7b83
MN
3565 for(level=0; level<s->spatial_decomposition_count; level++){
3566 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3567 SubBand *b= &p->band[level][orientation];
3568
3569 quantize(s, b, b->buf, b->stride, s->qbias);
3570 if(orientation==0)
3571 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3572 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3573 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3574 if(orientation==0)
3575 correlate(s, b, b->buf, b->stride, 1, 0);
3576 }
3577 }
3578// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3579
3580 for(level=0; level<s->spatial_decomposition_count; level++){
3581 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3582 SubBand *b= &p->band[level][orientation];
3583
3584 dequantize(s, b, b->buf, b->stride);
3585 }
3586 }
93fbdb5a 3587
46c281e8 3588 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a
MN
3589 if(s->qlog == LOSSLESS_QLOG){
3590 for(y=0; y<h; y++){
3591 for(x=0; x<w; x++){
034aff03 3592 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
93fbdb5a
MN
3593 }
3594 }
3595 }
715a97f0 3596{START_TIMER
791e7b83 3597 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
715a97f0 3598STOP_TIMER("pred-conv")}
791e7b83
MN
3599 if(s->avctx->flags&CODEC_FLAG_PSNR){
3600 int64_t error= 0;
3601
3602 if(pict->data[plane_index]) //FIXME gray hack
3603 for(y=0; y<h; y++){
3604 for(x=0; x<w; x++){
93fbdb5a 3605 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
791e7b83
MN
3606 error += d*d;
3607 }
3608 }
791e7b83 3609 s->avctx->error[plane_index] += error;
bd368b56 3610 s->current_picture.error[plane_index] = error;
791e7b83
MN
3611 }
3612 }
3613
3614 if(s->last_picture.data[0])
3615 avctx->release_buffer(avctx, &s->last_picture);
3616
2cd34043
LM
3617 s->current_picture.coded_picture_number = avctx->frame_number;
3618 s->current_picture.pict_type = pict->pict_type;
3619 s->current_picture.quality = pict->quality;
3620 if(avctx->flags&CODEC_FLAG_PASS1){
3621 s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
3622 s->m.current_picture.display_picture_number =
3623 s->m.current_picture.coded_picture_number = avctx->frame_number;
3624 s->m.pict_type = pict->pict_type;
3625 s->m.current_picture.quality = pict->quality;
3626 ff_write_pass1_stats(&s->m);
3627 }
3628 if(avctx->flags&CODEC_FLAG_PASS2){
3629 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3630 }
3631
791e7b83
MN
3632 emms_c();
3633
28869757 3634 return ff_rac_terminate(c);
791e7b83
MN
3635}
3636
3637static void common_end(SnowContext *s){
7b49c309
MN
3638 int plane_index, level, orientation;
3639
791e7b83 3640 av_freep(&s->spatial_dwt_buffer);
791e7b83
MN
3641
3642 av_freep(&s->m.me.scratchpad);
3643 av_freep(&s->m.me.map);
3644 av_freep(&s->m.me.score_map);
155ec6ed
MN
3645
3646 av_freep(&s->block);
7b49c309
MN
3647
3648 for(plane_index=0; plane_index<3; plane_index++){
3649 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3650 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3651 SubBand *b= &s->plane[plane_index].band[level][orientation];
3652
a0d1931c 3653 av_freep(&b->x_coeff);
7b49c309
MN
3654 }
3655 }
3656 }