Move Darwin shared library build peculiarities to configure.
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
28869757
MN
22
23#include "rangecoder.h"
24#define MID_STATE 128
791e7b83
MN
25
26#include "mpegvideo.h"
27
28#undef NDEBUG
29#include <assert.h>
30
31#define MAX_DECOMPOSITIONS 8
32#define MAX_PLANES 4
33#define DWTELEM int
a0a74ad9 34#define QSHIFT 5
c97de57c 35#define QROOT (1<<QSHIFT)
93fbdb5a 36#define LOSSLESS_QLOG -128
034aff03 37#define FRAC_BITS 8
791e7b83
MN
38
39static const int8_t quant3[256]={
40 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56};
57static const int8_t quant3b[256]={
58 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74};
538a3841
MN
75static const int8_t quant3bA[256]={
76 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92};
791e7b83
MN
93static const int8_t quant5[256]={
94 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110};
111static const int8_t quant7[256]={
112 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128};
129static const int8_t quant9[256]={
130 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146};
147static const int8_t quant11[256]={
148 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164};
165static const int8_t quant13[256]={
166 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182};
183
715a97f0
MN
184#define LOG2_OBMC_MAX 6
185#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
791e7b83
MN
186#if 0 //64*cubic
187static const uint8_t obmc32[1024]={
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220//error:0.000022
221};
222static const uint8_t obmc16[256]={
223 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239//error:0.000033
240};
241#elif 1 // 64*linear
242static const uint8_t obmc32[1024]={
243 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275 //error:0.000020
276};
277static const uint8_t obmc16[256]={
278 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294//error:0.000015
295};
296#else //64*cos
297static const uint8_t obmc32[1024]={
298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330//error:0.000022
331};
332static const uint8_t obmc16[256]={
333 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349//error:0.000022
350};
351#endif
352
155ec6ed
MN
353//linear *64
354static const uint8_t obmc8[64]={
355 1, 3, 5, 7, 7, 5, 3, 1,
356 3, 9,15,21,21,15, 9, 3,
357 5,15,25,35,35,25,15, 5,
358 7,21,35,49,49,35,21, 7,
359 7,21,35,49,49,35,21, 7,
360 5,15,25,35,35,25,15, 5,
361 3, 9,15,21,21,15, 9, 3,
362 1, 3, 5, 7, 7, 5, 3, 1,
363//error:0.000000
364};
365
366//linear *64
367static const uint8_t obmc4[16]={
368 4,12,12, 4,
36912,36,36,12,
37012,36,36,12,
371 4,12,12, 4,
372//error:0.000000
373};
374
375static const uint8_t *obmc_tab[4]={
376 obmc32, obmc16, obmc8, obmc4
377};
378
379typedef struct BlockNode{
380 int16_t mx;
381 int16_t my;
382 uint8_t color[3];
383 uint8_t type;
384//#define TYPE_SPLIT 1
385#define BLOCK_INTRA 1
51d6a3cf 386#define BLOCK_OPT 2
155ec6ed
MN
387//#define TYPE_NOCOLOR 4
388 uint8_t level; //FIXME merge into type?
389}BlockNode;
390
51d6a3cf
MN
391static const BlockNode null_block= { //FIXME add border maybe
392 .color= {128,128,128},
393 .mx= 0,
394 .my= 0,
395 .type= 0,
396 .level= 0,
397};
398
155ec6ed
MN
399#define LOG2_MB_SIZE 4
400#define MB_SIZE (1<<LOG2_MB_SIZE)
401
a0d1931c
Y
402typedef struct x_and_coeff{
403 int16_t x;
538a3841 404 uint16_t coeff;
a0d1931c
Y
405} x_and_coeff;
406
791e7b83
MN
407typedef struct SubBand{
408 int level;
409 int stride;
410 int width;
411 int height;
412 int qlog; ///< log(qscale)/log[2^(1/6)]
413 DWTELEM *buf;
a0d1931c
Y
414 int buf_x_offset;
415 int buf_y_offset;
416 int stride_line; ///< Stride measured in lines, not pixels.
417 x_and_coeff * x_coeff;
791e7b83
MN
418 struct SubBand *parent;
419 uint8_t state[/*7*2*/ 7 + 512][32];
420}SubBand;
421
422typedef struct Plane{
423 int width;
424 int height;
425 SubBand band[MAX_DECOMPOSITIONS][4];
426}Plane;
427
a0d1931c
Y
428/** Used to minimize the amount of memory used in order to optimize cache performance. **/
429typedef struct {
430 DWTELEM * * line; ///< For use by idwt and predict_slices.
431 DWTELEM * * data_stack; ///< Used for internal purposes.
432 int data_stack_top;
433 int line_count;
434 int line_width;
435 int data_count;
436 DWTELEM * base_buffer; ///< Buffer that this structure is caching.
437} slice_buffer;
438
791e7b83
MN
439typedef struct SnowContext{
440// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
441
442 AVCodecContext *avctx;
28869757 443 RangeCoder c;
791e7b83 444 DSPContext dsp;
51d6a3cf
MN
445 AVFrame new_picture;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83
MN
447 AVFrame current_picture;
448 AVFrame last_picture;
449 AVFrame mconly_picture;
450// uint8_t q_context[16];
451 uint8_t header_state[32];
155ec6ed 452 uint8_t block_state[128 + 32*128];
791e7b83 453 int keyframe;
19aa028d 454 int always_reset;
791e7b83
MN
455 int version;
456 int spatial_decomposition_type;
457 int temporal_decomposition_type;
458 int spatial_decomposition_count;
459 int temporal_decomposition_count;
460 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
461 int colorspace_type;
462 int chroma_h_shift;
463 int chroma_v_shift;
464 int spatial_scalability;
465 int qlog;
155ec6ed
MN
466 int lambda;
467 int lambda2;
791e7b83
MN
468 int mv_scale;
469 int qbias;
470#define QBIAS_SHIFT 3
155ec6ed
MN
471 int b_width;
472 int b_height;
473 int block_max_depth;
791e7b83 474 Plane plane[MAX_PLANES];
155ec6ed 475 BlockNode *block;
51d6a3cf
MN
476#define ME_CACHE_SIZE 1024
477 int me_cache[ME_CACHE_SIZE];
478 int me_cache_generation;
a0d1931c 479 slice_buffer sb;
155ec6ed 480
791e7b83
MN
481 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
482}SnowContext;
483
f9e6ebf7
LM
484typedef struct {
485 DWTELEM *b0;
486 DWTELEM *b1;
487 DWTELEM *b2;
488 DWTELEM *b3;
489 int y;
490} dwt_compose_t;
491
a0d1931c
Y
492#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
493//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
494
51d6a3cf
MN
495static void iterative_me(SnowContext *s);
496
a0d1931c
Y
497static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
498{
499 int i;
115329f1 500
a0d1931c
Y
501 buf->base_buffer = base_buffer;
502 buf->line_count = line_count;
503 buf->line_width = line_width;
504 buf->data_count = max_allocated_lines;
505 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
506 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
115329f1 507
a0d1931c
Y
508 for (i = 0; i < max_allocated_lines; i++)
509 {
510 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
511 }
115329f1 512
a0d1931c
Y
513 buf->data_stack_top = max_allocated_lines - 1;
514}
515
516static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
517{
a0d1931c
Y
518 int offset;
519 DWTELEM * buffer;
115329f1
DB
520
521// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
522
a0d1931c
Y
523 assert(buf->data_stack_top >= 0);
524// assert(!buf->line[line]);
525 if (buf->line[line])
526 return buf->line[line];
115329f1 527
a0d1931c
Y
528 offset = buf->line_width * line;
529 buffer = buf->data_stack[buf->data_stack_top];
530 buf->data_stack_top--;
531 buf->line[line] = buffer;
115329f1 532
a0d1931c 533// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
115329f1 534
a0d1931c
Y
535 return buffer;
536}
537
538static void slice_buffer_release(slice_buffer * buf, int line)
539{
a0d1931c
Y
540 int offset;
541 DWTELEM * buffer;
542
543 assert(line >= 0 && line < buf->line_count);
544 assert(buf->line[line]);
545
546 offset = buf->line_width * line;
547 buffer = buf->line[line];
548 buf->data_stack_top++;
549 buf->data_stack[buf->data_stack_top] = buffer;
550 buf->line[line] = NULL;
115329f1 551
a0d1931c
Y
552// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
553}
554
555static void slice_buffer_flush(slice_buffer * buf)
556{
557 int i;
558 for (i = 0; i < buf->line_count; i++)
559 {
560 if (buf->line[i])
561 {
562// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
563 slice_buffer_release(buf, i);
564 }
565 }
566}
567
568static void slice_buffer_destroy(slice_buffer * buf)
569{
570 int i;
571 slice_buffer_flush(buf);
115329f1 572
a0d1931c
Y
573 for (i = buf->data_count - 1; i >= 0; i--)
574 {
575 assert(buf->data_stack[i]);
576 av_free(buf->data_stack[i]);
577 }
578 assert(buf->data_stack);
579 av_free(buf->data_stack);
580 assert(buf->line);
581 av_free(buf->line);
582}
583
bb270c08 584#ifdef __sgi
2554db9b 585// Avoid a name clash on SGI IRIX
bb270c08 586#undef qexp
2554db9b 587#endif
034aff03 588#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 589static uint8_t qexp[QROOT];
791e7b83
MN
590
591static inline int mirror(int v, int m){
13705b69
MN
592 while((unsigned)v > (unsigned)m){
593 v=-v;
594 if(v<0) v+= 2*m;
595 }
596 return v;
791e7b83
MN
597}
598
28869757 599static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
600 int i;
601
602 if(v){
603 const int a= ABS(v);
604 const int e= av_log2(a);
605#if 1
115329f1 606 const int el= FFMIN(e, 10);
28869757 607 put_rac(c, state+0, 0);
791e7b83
MN
608
609 for(i=0; i<el; i++){
28869757 610 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
611 }
612 for(; i<e; i++){
28869757 613 put_rac(c, state+1+9, 1); //1..10
791e7b83 614 }
28869757 615 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
616
617 for(i=e-1; i>=el; i--){
28869757 618 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
619 }
620 for(; i>=0; i--){
28869757 621 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
622 }
623
624 if(is_signed)
28869757 625 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 626#else
115329f1 627
28869757 628 put_rac(c, state+0, 0);
791e7b83
MN
629 if(e<=9){
630 for(i=0; i<e; i++){
28869757 631 put_rac(c, state+1+i, 1); //1..10
791e7b83 632 }
28869757 633 put_rac(c, state+1+i, 0);
791e7b83
MN
634
635 for(i=e-1; i>=0; i--){
28869757 636 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
637 }
638
639 if(is_signed)
28869757 640 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
641 }else{
642 for(i=0; i<e; i++){
28869757 643 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 644 }
28869757 645 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
646
647 for(i=e-1; i>=0; i--){
28869757 648 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
649 }
650
651 if(is_signed)
28869757 652 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
653 }
654#endif
655 }else{
28869757 656 put_rac(c, state+0, 1);
791e7b83
MN
657 }
658}
659
28869757
MN
660static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
661 if(get_rac(c, state+0))
791e7b83
MN
662 return 0;
663 else{
7c2425d2
LM
664 int i, e, a;
665 e= 0;
28869757 666 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 667 e++;
791e7b83 668 }
7c2425d2 669
791e7b83 670 a= 1;
7c2425d2 671 for(i=e-1; i>=0; i--){
28869757 672 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
673 }
674
28869757 675 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
676 return -a;
677 else
678 return a;
679 }
680}
681
28869757 682static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 683 int i;
0635cbfc 684 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
685
686 assert(v>=0);
0635cbfc
MN
687 assert(log2>=-4);
688
689 while(v >= r){
28869757 690 put_rac(c, state+4+log2, 1);
0635cbfc 691 v -= r;
4f4e9633 692 log2++;
0635cbfc 693 if(log2>0) r+=r;
4f4e9633 694 }
28869757 695 put_rac(c, state+4+log2, 0);
115329f1 696
4f4e9633 697 for(i=log2-1; i>=0; i--){
28869757 698 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 699 }
4f4e9633
MN
700}
701
28869757 702static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 703 int i;
0635cbfc 704 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
705 int v=0;
706
0635cbfc
MN
707 assert(log2>=-4);
708
28869757 709 while(get_rac(c, state+4+log2)){
0635cbfc 710 v+= r;
4f4e9633 711 log2++;
0635cbfc 712 if(log2>0) r+=r;
4f4e9633 713 }
115329f1 714
4f4e9633 715 for(i=log2-1; i>=0; i--){
28869757 716 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
717 }
718
719 return v;
720}
721
791e7b83
MN
722static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
723 const int mirror_left= !highpass;
724 const int mirror_right= (width&1) ^ highpass;
725 const int w= (width>>1) - 1 + (highpass & width);
726 int i;
727
728#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
729 if(mirror_left){
730 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
731 dst += dst_step;
732 src += src_step;
733 }
115329f1 734
791e7b83
MN
735 for(i=0; i<w; i++){
736 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
737 }
115329f1 738
791e7b83
MN
739 if(mirror_right){
740 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
741 }
742}
743
744static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
745 const int mirror_left= !highpass;
746 const int mirror_right= (width&1) ^ highpass;
747 const int w= (width>>1) - 1 + (highpass & width);
748 int i;
749
750 if(mirror_left){
751 int r= 3*2*ref[0];
752 r += r>>4;
753 r += r>>8;
754 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
755 dst += dst_step;
756 src += src_step;
757 }
115329f1 758
791e7b83
MN
759 for(i=0; i<w; i++){
760 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
761 r += r>>4;
762 r += r>>8;
763 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
764 }
115329f1 765
791e7b83
MN
766 if(mirror_right){
767 int r= 3*2*ref[w*ref_step];
768 r += r>>4;
769 r += r>>8;
770 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
771 }
772}
773
f5a71928
MN
774static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
775 const int mirror_left= !highpass;
776 const int mirror_right= (width&1) ^ highpass;
777 const int w= (width>>1) - 1 + (highpass & width);
778 int i;
779
780 assert(shift == 4);
781#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
782 if(mirror_left){
783 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
784 dst += dst_step;
785 src += src_step;
786 }
115329f1 787
f5a71928
MN
788 for(i=0; i<w; i++){
789 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
790 }
115329f1 791
f5a71928
MN
792 if(mirror_right){
793 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
794 }
795}
796
791e7b83 797
aa25a462 798static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83 799 int x, i;
115329f1 800
791e7b83
MN
801 for(x=start; x<width; x+=2){
802 int64_t sum=0;
803
804 for(i=0; i<n; i++){
805 int x2= x + 2*i - n + 1;
806 if (x2< 0) x2= -x2;
807 else if(x2>=width) x2= 2*width-x2-2;
808 sum += coeffs[i]*(int64_t)dst[x2];
809 }
810 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
811 else dst[x] += (sum + (1<<shift)/2)>>shift;
812 }
813}
814
aa25a462 815static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
816 int x, y, i;
817 for(y=start; y<height; y+=2){
818 for(x=0; x<width; x++){
819 int64_t sum=0;
115329f1 820
791e7b83
MN
821 for(i=0; i<n; i++){
822 int y2= y + 2*i - n + 1;
823 if (y2< 0) y2= -y2;
824 else if(y2>=height) y2= 2*height-y2-2;
825 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
826 }
827 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
828 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
829 }
830 }
831}
832
833#define SCALEX 1
834#define LX0 0
835#define LX1 1
836
de890c9b 837#if 0 // more accurate 9/7
791e7b83
MN
838#define N1 2
839#define SHIFT1 14
840#define COEFFS1 (int[]){-25987,-25987}
841#define N2 2
842#define SHIFT2 19
843#define COEFFS2 (int[]){-27777,-27777}
844#define N3 2
845#define SHIFT3 15
846#define COEFFS3 (int[]){28931,28931}
847#define N4 2
848#define SHIFT4 15
849#define COEFFS4 (int[]){14533,14533}
850#elif 1 // 13/7 CRF
851#define N1 4
852#define SHIFT1 4
853#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 854#define N2 4
791e7b83
MN
855#define SHIFT2 4
856#define COEFFS2 (int[]){-1,5,5,-1}
857#define N3 0
858#define SHIFT3 1
859#define COEFFS3 NULL
860#define N4 0
861#define SHIFT4 1
862#define COEFFS4 NULL
863#elif 1 // 3/5
864#define LX0 1
865#define LX1 0
866#define SCALEX 0.5
867#define N1 2
868#define SHIFT1 1
869#define COEFFS1 (int[]){1,1}
870#define N2 2
871#define SHIFT2 2
872#define COEFFS2 (int[]){-1,-1}
873#define N3 0
874#define SHIFT3 0
875#define COEFFS3 NULL
876#define N4 0
877#define SHIFT4 0
878#define COEFFS4 NULL
115329f1 879#elif 1 // 11/5
791e7b83
MN
880#define N1 0
881#define SHIFT1 1
882#define COEFFS1 NULL
883#define N2 2
884#define SHIFT2 2
885#define COEFFS2 (int[]){-1,-1}
886#define N3 2
887#define SHIFT3 0
888#define COEFFS3 (int[]){-1,-1}
889#define N4 4
890#define SHIFT4 7
891#define COEFFS4 (int[]){-5,29,29,-5}
892#define SCALEX 4
893#elif 1 // 9/7 CDF
894#define N1 2
895#define SHIFT1 7
896#define COEFFS1 (int[]){-203,-203}
897#define N2 2
898#define SHIFT2 12
899#define COEFFS2 (int[]){-217,-217}
900#define N3 2
901#define SHIFT3 7
902#define COEFFS3 (int[]){113,113}
903#define N4 2
904#define SHIFT4 9
905#define COEFFS4 (int[]){227,227}
906#define SCALEX 1
907#elif 1 // 7/5 CDF
908#define N1 0
909#define SHIFT1 1
910#define COEFFS1 NULL
911#define N2 2
912#define SHIFT2 2
913#define COEFFS2 (int[]){-1,-1}
914#define N3 2
915#define SHIFT3 0
916#define COEFFS3 (int[]){-1,-1}
917#define N4 2
918#define SHIFT4 4
919#define COEFFS4 (int[]){3,3}
920#elif 1 // 9/7 MN
921#define N1 4
922#define SHIFT1 4
923#define COEFFS1 (int[]){1,-9,-9,1}
924#define N2 2
925#define SHIFT2 2
926#define COEFFS2 (int[]){1,1}
927#define N3 0
928#define SHIFT3 1
929#define COEFFS3 NULL
930#define N4 0
931#define SHIFT4 1
932#define COEFFS4 NULL
933#else // 13/7 CRF
934#define N1 4
935#define SHIFT1 4
936#define COEFFS1 (int[]){1,-9,-9,1}
937#define N2 4
938#define SHIFT2 4
939#define COEFFS2 (int[]){-1,5,5,-1}
940#define N3 0
941#define SHIFT3 1
942#define COEFFS3 NULL
943#define N4 0
944#define SHIFT4 1
945#define COEFFS4 NULL
946#endif
aa25a462
RFI
947static void horizontal_decomposeX(DWTELEM *b, int width){
948 DWTELEM temp[width];
791e7b83
MN
949 const int width2= width>>1;
950 const int w2= (width+1)>>1;
62ab0b78 951 int x;
791e7b83
MN
952
953 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
954 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
955 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
956 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
115329f1 957
791e7b83
MN
958 for(x=0; x<width2; x++){
959 temp[x ]= b[2*x ];
960 temp[x+w2]= b[2*x + 1];
961 }
962 if(width&1)
963 temp[x ]= b[2*x ];
964 memcpy(b, temp, width*sizeof(int));
965}
966
aa25a462
RFI
967static void horizontal_composeX(DWTELEM *b, int width){
968 DWTELEM temp[width];
791e7b83 969 const int width2= width>>1;
62ab0b78 970 int x;
791e7b83
MN
971 const int w2= (width+1)>>1;
972
973 memcpy(temp, b, width*sizeof(int));
974 for(x=0; x<width2; x++){
975 b[2*x ]= temp[x ];
976 b[2*x + 1]= temp[x+w2];
977 }
978 if(width&1)
979 b[2*x ]= temp[x ];
980
981 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
982 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
983 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
984 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
985}
986
aa25a462 987static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 988 int x, y;
115329f1 989
791e7b83
MN
990 for(y=0; y<height; y++){
991 for(x=0; x<width; x++){
992 buffer[y*stride + x] *= SCALEX;
993 }
994 }
995
996 for(y=0; y<height; y++){
997 horizontal_decomposeX(buffer + y*stride, width);
998 }
115329f1 999
791e7b83
MN
1000 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1002 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
115329f1 1003 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
791e7b83
MN
1004}
1005
aa25a462 1006static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 1007 int x, y;
115329f1 1008
791e7b83
MN
1009 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1011 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1012 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1013
1014 for(y=0; y<height; y++){
1015 horizontal_composeX(buffer + y*stride, width);
1016 }
1017
1018 for(y=0; y<height; y++){
1019 for(x=0; x<width; x++){
1020 buffer[y*stride + x] /= SCALEX;
1021 }
1022 }
1023}
1024
aa25a462
RFI
1025static void horizontal_decompose53i(DWTELEM *b, int width){
1026 DWTELEM temp[width];
791e7b83 1027 const int width2= width>>1;
62ab0b78 1028 int x;
791e7b83
MN
1029 const int w2= (width+1)>>1;
1030
1031 for(x=0; x<width2; x++){
1032 temp[x ]= b[2*x ];
1033 temp[x+w2]= b[2*x + 1];
1034 }
1035 if(width&1)
1036 temp[x ]= b[2*x ];
1037#if 0
62ab0b78
AJ
1038 {
1039 int A1,A2,A3,A4;
791e7b83
MN
1040 A2= temp[1 ];
1041 A4= temp[0 ];
1042 A1= temp[0+width2];
1043 A1 -= (A2 + A4)>>1;
1044 A4 += (A1 + 1)>>1;
1045 b[0+width2] = A1;
1046 b[0 ] = A4;
1047 for(x=1; x+1<width2; x+=2){
1048 A3= temp[x+width2];
1049 A4= temp[x+1 ];
1050 A3 -= (A2 + A4)>>1;
1051 A2 += (A1 + A3 + 2)>>2;
1052 b[x+width2] = A3;
1053 b[x ] = A2;
1054
1055 A1= temp[x+1+width2];
1056 A2= temp[x+2 ];
1057 A1 -= (A2 + A4)>>1;
1058 A4 += (A1 + A3 + 2)>>2;
1059 b[x+1+width2] = A1;
1060 b[x+1 ] = A4;
1061 }
1062 A3= temp[width-1];
1063 A3 -= A2;
1064 A2 += (A1 + A3 + 2)>>2;
1065 b[width -1] = A3;
1066 b[width2-1] = A2;
62ab0b78 1067 }
115329f1 1068#else
791e7b83
MN
1069 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1070 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1071#endif
1072}
1073
aa25a462 1074static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1075 int i;
115329f1 1076
791e7b83
MN
1077 for(i=0; i<width; i++){
1078 b1[i] -= (b0[i] + b2[i])>>1;
1079 }
1080}
1081
aa25a462 1082static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1083 int i;
115329f1 1084
791e7b83
MN
1085 for(i=0; i<width; i++){
1086 b1[i] += (b0[i] + b2[i] + 2)>>2;
1087 }
1088}
1089
aa25a462 1090static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1091 int y;
791e7b83
MN
1092 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1093 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 1094
791e7b83
MN
1095 for(y=-2; y<height; y+=2){
1096 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1097 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098
1099{START_TIMER
13705b69
MN
1100 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1101 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
791e7b83 1102STOP_TIMER("horizontal_decompose53i")}
115329f1 1103
791e7b83 1104{START_TIMER
13705b69
MN
1105 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1106 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
791e7b83 1107STOP_TIMER("vertical_decompose53i*")}
115329f1 1108
791e7b83
MN
1109 b0=b2;
1110 b1=b3;
1111 }
1112}
1113
f5a71928 1114#define liftS lift
791e7b83
MN
1115#define lift5 lift
1116#if 1
1117#define W_AM 3
1118#define W_AO 0
1119#define W_AS 1
1120
f5a71928 1121#undef liftS
791e7b83
MN
1122#define W_BM 1
1123#define W_BO 8
1124#define W_BS 4
1125
f5a71928
MN
1126#define W_CM 1
1127#define W_CO 0
1128#define W_CS 0
791e7b83 1129
f5a71928
MN
1130#define W_DM 3
1131#define W_DO 4
1132#define W_DS 3
791e7b83
MN
1133#elif 0
1134#define W_AM 55
1135#define W_AO 16
1136#define W_AS 5
1137
1138#define W_BM 3
1139#define W_BO 32
1140#define W_BS 6
1141
1142#define W_CM 127
1143#define W_CO 64
1144#define W_CS 7
1145
1146#define W_DM 7
1147#define W_DO 8
1148#define W_DS 4
1149#elif 0
1150#define W_AM 97
1151#define W_AO 32
1152#define W_AS 6
1153
1154#define W_BM 63
1155#define W_BO 512
1156#define W_BS 10
1157
1158#define W_CM 13
1159#define W_CO 8
1160#define W_CS 4
1161
1162#define W_DM 15
1163#define W_DO 16
1164#define W_DS 5
1165
1166#else
1167
1168#define W_AM 203
1169#define W_AO 64
1170#define W_AS 7
1171
1172#define W_BM 217
1173#define W_BO 2048
1174#define W_BS 12
1175
1176#define W_CM 113
1177#define W_CO 64
1178#define W_CS 7
1179
1180#define W_DM 227
1181#define W_DO 128
1182#define W_DS 9
1183#endif
aa25a462
RFI
1184static void horizontal_decompose97i(DWTELEM *b, int width){
1185 DWTELEM temp[width];
791e7b83
MN
1186 const int w2= (width+1)>>1;
1187
1188 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
f5a71928 1189 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
791e7b83
MN
1190 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1191 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1192}
1193
1194
aa25a462 1195static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1196 int i;
115329f1 1197
791e7b83
MN
1198 for(i=0; i<width; i++){
1199 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1200 }
1201}
1202
aa25a462 1203static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1204 int i;
115329f1 1205
791e7b83
MN
1206 for(i=0; i<width; i++){
1207#ifdef lift5
1208 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1209#else
1210 int r= 3*(b0[i] + b2[i]);
1211 r+= r>>4;
1212 r+= r>>8;
1213 b1[i] += (r+W_CO)>>W_CS;
1214#endif
1215 }
1216}
1217
aa25a462 1218static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1219 int i;
115329f1 1220
791e7b83 1221 for(i=0; i<width; i++){
f5a71928 1222#ifdef liftS
791e7b83 1223 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1224#else
1225 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1226#endif
791e7b83
MN
1227 }
1228}
1229
aa25a462 1230static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1231 int i;
115329f1 1232
791e7b83
MN
1233 for(i=0; i<width; i++){
1234 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1235 }
1236}
1237
aa25a462 1238static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1239 int y;
791e7b83
MN
1240 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1241 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1242 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1243 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 1244
791e7b83
MN
1245 for(y=-4; y<height; y+=2){
1246 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1247 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1248
1249{START_TIMER
13705b69
MN
1250 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1251 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
791e7b83
MN
1252if(width>400){
1253STOP_TIMER("horizontal_decompose97i")
1254}}
115329f1 1255
791e7b83 1256{START_TIMER
13705b69
MN
1257 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1258 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1259 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1260 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83
MN
1261
1262if(width>400){
1263STOP_TIMER("vertical_decompose97i")
1264}}
115329f1 1265
791e7b83
MN
1266 b0=b2;
1267 b1=b3;
1268 b2=b4;
1269 b3=b5;
1270 }
1271}
1272
aa25a462 1273void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1274 int level;
115329f1 1275
46c281e8
MN
1276 for(level=0; level<decomposition_count; level++){
1277 switch(type){
791e7b83
MN
1278 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1279 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1280 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1281 }
1282 }
1283}
1284
aa25a462
RFI
1285static void horizontal_compose53i(DWTELEM *b, int width){
1286 DWTELEM temp[width];
791e7b83
MN
1287 const int width2= width>>1;
1288 const int w2= (width+1)>>1;
62ab0b78 1289 int x;
791e7b83
MN
1290
1291#if 0
62ab0b78 1292 int A1,A2,A3,A4;
791e7b83
MN
1293 A2= temp[1 ];
1294 A4= temp[0 ];
1295 A1= temp[0+width2];
1296 A1 -= (A2 + A4)>>1;
1297 A4 += (A1 + 1)>>1;
1298 b[0+width2] = A1;
1299 b[0 ] = A4;
1300 for(x=1; x+1<width2; x+=2){
1301 A3= temp[x+width2];
1302 A4= temp[x+1 ];
1303 A3 -= (A2 + A4)>>1;
1304 A2 += (A1 + A3 + 2)>>2;
1305 b[x+width2] = A3;
1306 b[x ] = A2;
1307
1308 A1= temp[x+1+width2];
1309 A2= temp[x+2 ];
1310 A1 -= (A2 + A4)>>1;
1311 A4 += (A1 + A3 + 2)>>2;
1312 b[x+1+width2] = A1;
1313 b[x+1 ] = A4;
1314 }
1315 A3= temp[width-1];
1316 A3 -= A2;
1317 A2 += (A1 + A3 + 2)>>2;
1318 b[width -1] = A3;
1319 b[width2-1] = A2;
115329f1 1320#else
791e7b83
MN
1321 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1322 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1323#endif
1324 for(x=0; x<width2; x++){
1325 b[2*x ]= temp[x ];
1326 b[2*x + 1]= temp[x+w2];
1327 }
1328 if(width&1)
1329 b[2*x ]= temp[x ];
1330}
1331
aa25a462 1332static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1333 int i;
115329f1 1334
791e7b83
MN
1335 for(i=0; i<width; i++){
1336 b1[i] += (b0[i] + b2[i])>>1;
1337 }
1338}
1339
aa25a462 1340static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1341 int i;
115329f1 1342
791e7b83
MN
1343 for(i=0; i<width; i++){
1344 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1345 }
1346}
1347
a0d1931c
Y
1348static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1349 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1350 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1351 cs->y = -1;
1352}
1353
f9e6ebf7
LM
1354static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1355 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1356 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1357 cs->y = -1;
1358}
1359
a0d1931c
Y
1360static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1361 int y= cs->y;
115329f1 1362
a0d1931c
Y
1363 DWTELEM *b0= cs->b0;
1364 DWTELEM *b1= cs->b1;
3b6ab26c
MN
1365 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1366 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c
Y
1367
1368{START_TIMER
13705b69
MN
1369 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1370 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
a0d1931c
Y
1371STOP_TIMER("vertical_compose53i*")}
1372
1373{START_TIMER
13705b69
MN
1374 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1375 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1376STOP_TIMER("horizontal_compose53i")}
1377
1378 cs->b0 = b2;
1379 cs->b1 = b3;
1380 cs->y += 2;
1381}
1382
f9e6ebf7
LM
1383static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1384 int y= cs->y;
1385 DWTELEM *b0= cs->b0;
1386 DWTELEM *b1= cs->b1;
1387 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1388 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1389
1390{START_TIMER
13705b69
MN
1391 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1392 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83
MN
1393STOP_TIMER("vertical_compose53i*")}
1394
1395{START_TIMER
13705b69
MN
1396 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1397 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83
MN
1398STOP_TIMER("horizontal_compose53i")}
1399
f9e6ebf7
LM
1400 cs->b0 = b2;
1401 cs->b1 = b3;
1402 cs->y += 2;
1403}
1404
1405static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1406 dwt_compose_t cs;
1407 spatial_compose53i_init(&cs, buffer, height, stride);
1408 while(cs.y <= height)
1409 spatial_compose53i_dy(&cs, buffer, width, height, stride);
115329f1
DB
1410}
1411
791e7b83 1412
aa25a462
RFI
1413static void horizontal_compose97i(DWTELEM *b, int width){
1414 DWTELEM temp[width];
791e7b83
MN
1415 const int w2= (width+1)>>1;
1416
1417 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1418 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
f5a71928 1419 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
791e7b83
MN
1420 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1421}
1422
aa25a462 1423static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1424 int i;
115329f1 1425
791e7b83
MN
1426 for(i=0; i<width; i++){
1427 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1428 }
1429}
1430
aa25a462 1431static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1432 int i;
115329f1 1433
791e7b83
MN
1434 for(i=0; i<width; i++){
1435#ifdef lift5
1436 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1437#else
1438 int r= 3*(b0[i] + b2[i]);
1439 r+= r>>4;
1440 r+= r>>8;
1441 b1[i] -= (r+W_CO)>>W_CS;
1442#endif
1443 }
1444}
1445
aa25a462 1446static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1447 int i;
115329f1 1448
791e7b83 1449 for(i=0; i<width; i++){
f5a71928 1450#ifdef liftS
791e7b83 1451 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1452#else
1453 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1454#endif
791e7b83
MN
1455 }
1456}
1457
aa25a462 1458static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1459 int i;
115329f1 1460
791e7b83
MN
1461 for(i=0; i<width; i++){
1462 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1463 }
1464}
1465
565a45ac
MN
1466static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1467 int i;
115329f1 1468
565a45ac 1469 for(i=0; i<width; i++){
62ab0b78 1470#ifndef lift5
565a45ac 1471 int r;
62ab0b78 1472#endif
565a45ac
MN
1473 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1474#ifdef lift5
1475 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1476#else
1477 r= 3*(b2[i] + b4[i]);
1478 r+= r>>4;
1479 r+= r>>8;
1480 b3[i] -= (r+W_CO)>>W_CS;
1481#endif
f5a71928 1482#ifdef liftS
565a45ac 1483 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1484#else
1485 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1486#endif
565a45ac
MN
1487 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1488 }
1489}
1490
a0d1931c
Y
1491static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1492 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1493 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1494 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1495 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1496 cs->y = -3;
1497}
1498
f9e6ebf7
LM
1499static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1500 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1501 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1502 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1503 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1504 cs->y = -3;
1505}
791e7b83 1506
a0d1931c
Y
1507static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1508 int y = cs->y;
115329f1 1509
a0d1931c
Y
1510 DWTELEM *b0= cs->b0;
1511 DWTELEM *b1= cs->b1;
1512 DWTELEM *b2= cs->b2;
1513 DWTELEM *b3= cs->b3;
3b6ab26c
MN
1514 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1515 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1516
a0d1931c 1517{START_TIMER
565a45ac
MN
1518 if(y>0 && y+4<height){
1519 vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1520 }else{
13705b69
MN
1521 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1522 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1523 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1524 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1525 }
a0d1931c
Y
1526if(width>400){
1527STOP_TIMER("vertical_compose97i")}}
a0d1931c
Y
1528
1529{START_TIMER
13705b69
MN
1530 if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1531 if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
3b6ab26c 1532if(width>400 && y+0<(unsigned)height){
a0d1931c
Y
1533STOP_TIMER("horizontal_compose97i")}}
1534
1535 cs->b0=b2;
1536 cs->b1=b3;
1537 cs->b2=b4;
1538 cs->b3=b5;
1539 cs->y += 2;
1540}
1541
f9e6ebf7
LM
1542static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1543 int y = cs->y;
1544 DWTELEM *b0= cs->b0;
1545 DWTELEM *b1= cs->b1;
1546 DWTELEM *b2= cs->b2;
1547 DWTELEM *b3= cs->b3;
1548 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1549 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1550
791e7b83 1551{START_TIMER
13705b69
MN
1552 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1553 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1554 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1555 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83
MN
1556if(width>400){
1557STOP_TIMER("vertical_compose97i")}}
1558
1559{START_TIMER
13705b69
MN
1560 if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1561 if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
791e7b83
MN
1562if(width>400 && b0 <= b2){
1563STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1564
1565 cs->b0=b2;
1566 cs->b1=b3;
1567 cs->b2=b4;
1568 cs->b3=b5;
1569 cs->y += 2;
1570}
1571
1572static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1573 dwt_compose_t cs;
1574 spatial_compose97i_init(&cs, buffer, height, stride);
1575 while(cs.y <= height)
1576 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1577}
1578
a0d1931c
Y
1579void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1580 int level;
1581 for(level=decomposition_count-1; level>=0; level--){
1582 switch(type){
1583 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1584 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1585 /* not slicified yet */
1586 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1587 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1588 }
1589 }
1590}
1591
f9e6ebf7
LM
1592void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1593 int level;
1594 for(level=decomposition_count-1; level>=0; level--){
1595 switch(type){
1596 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1597 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1598 /* not slicified yet */
1599 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1600 }
791e7b83
MN
1601 }
1602}
1603
f9e6ebf7
LM
1604void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1605 const int support = type==1 ? 3 : 5;
791e7b83 1606 int level;
f9e6ebf7 1607 if(type==2) return;
791e7b83 1608
46c281e8 1609 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1610 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1611 switch(type){
1612 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1613 break;
1614 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1615 break;
1616 case 2: break;
1617 }
791e7b83
MN
1618 }
1619 }
1620}
1621
a0d1931c
Y
1622void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1623 const int support = type==1 ? 3 : 5;
1624 int level;
1625 if(type==2) return;
1626
1627 for(level=decomposition_count-1; level>=0; level--){
1628 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1629 switch(type){
1630 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1631 break;
1632 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1633 break;
1634 case 2: break;
1635 }
1636 }
1637 }
1638}
1639
f9e6ebf7
LM
1640void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1641 if(type==2){
1642 int level;
1643 for(level=decomposition_count-1; level>=0; level--)
1644 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1645 }else{
1646 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1647 int y;
1648 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1649 for(y=0; y<height; y+=4)
1650 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1651 }
1652}
1653
0ecca7a4 1654static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1655 const int w= b->width;
1656 const int h= b->height;
1657 int x, y;
1658
791e7b83 1659 if(1){
791e7b83 1660 int run=0;
a8d73e56 1661 int runs[w*h];
791e7b83 1662 int run_index=0;
b44985ba 1663 int max_index;
115329f1 1664
791e7b83
MN
1665 for(y=0; y<h; y++){
1666 for(x=0; x<w; x++){
78486403 1667 int v, p=0;
6b2f6646 1668 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1669 v= src[x + y*stride];
791e7b83
MN
1670
1671 if(y){
a8d73e56 1672 t= src[x + (y-1)*stride];
791e7b83 1673 if(x){
a8d73e56 1674 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1675 }
1676 if(x + 1 < w){
a8d73e56 1677 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1678 }
1679 }
1680 if(x){
a8d73e56 1681 l= src[x - 1 + y*stride];
6b2f6646
MN
1682 /*if(x > 1){
1683 if(orientation==1) ll= src[y + (x-2)*stride];
1684 else ll= src[x - 2 + y*stride];
791e7b83
MN
1685 }*/
1686 }
78486403 1687 if(parent){
a8d73e56
MN
1688 int px= x>>1;
1689 int py= y>>1;
115329f1 1690 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1691 p= parent[px + py*2*stride];
1692 }
1693 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1694 if(v){
1695 runs[run_index++]= run;
1696 run=0;
1697 }else{
1698 run++;
1699 }
1700 }
1701 }
1702 }
b44985ba 1703 max_index= run_index;
791e7b83
MN
1704 runs[run_index++]= run;
1705 run_index=0;
1706 run= runs[run_index++];
1707
b44985ba
MN
1708 put_symbol2(&s->c, b->state[30], max_index, 0);
1709 if(run_index <= max_index)
1710 put_symbol2(&s->c, b->state[1], run, 3);
115329f1 1711
791e7b83 1712 for(y=0; y<h; y++){
d06c75a8 1713 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1714 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1715 return -1;
1716 }
791e7b83 1717 for(x=0; x<w; x++){
78486403 1718 int v, p=0;
6b2f6646 1719 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1720 v= src[x + y*stride];
791e7b83
MN
1721
1722 if(y){
a8d73e56 1723 t= src[x + (y-1)*stride];
791e7b83 1724 if(x){
a8d73e56 1725 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1726 }
1727 if(x + 1 < w){
a8d73e56 1728 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1729 }
1730 }
1731 if(x){
a8d73e56 1732 l= src[x - 1 + y*stride];
6b2f6646
MN
1733 /*if(x > 1){
1734 if(orientation==1) ll= src[y + (x-2)*stride];
1735 else ll= src[x - 2 + y*stride];
791e7b83
MN
1736 }*/
1737 }
78486403 1738 if(parent){
a8d73e56
MN
1739 int px= x>>1;
1740 int py= y>>1;
115329f1 1741 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1742 p= parent[px + py*2*stride];
1743 }
1744 if(/*ll|*/l|lt|t|rt|p){
1745 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1746
28869757 1747 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1748 }else{
1749 if(!run){
1750 run= runs[run_index++];
4f4e9633 1751
b44985ba
MN
1752 if(run_index <= max_index)
1753 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1754 assert(v);
1755 }else{
1756 run--;
1757 assert(!v);
1758 }
1759 }
1760 if(v){
78486403 1761 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
538a3841
MN
1762 int l2= 2*ABS(l) + (l<0);
1763 int t2= 2*ABS(t) + (t<0);
6b2f6646 1764
0635cbfc 1765 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
538a3841 1766 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1767 }
1768 }
1769 }
791e7b83 1770 }
0ecca7a4 1771 return 0;
791e7b83
MN
1772}
1773
115329f1 1774static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1775// encode_subband_qtree(s, b, src, parent, stride, orientation);
1776// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1777 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1778// encode_subband_dzr(s, b, src, parent, stride, orientation);
1779}
1780
a0d1931c 1781static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1782 const int w= b->width;
1783 const int h= b->height;
1784 int x,y;
115329f1 1785
791e7b83 1786 if(1){
b44985ba 1787 int run, runs;
cbb1d2b1
MN
1788 x_and_coeff *xc= b->x_coeff;
1789 x_and_coeff *prev_xc= NULL;
1790 x_and_coeff *prev2_xc= xc;
1791 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1792 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1793
b44985ba
MN
1794 runs= get_symbol2(&s->c, b->state[30], 0);
1795 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1796 else run= INT_MAX;
1797
791e7b83 1798 for(y=0; y<h; y++){
0cea8a03
MN
1799 int v=0;
1800 int lt=0, t=0, rt=0;
1801
cbb1d2b1
MN
1802 if(y && prev_xc->x == 0){
1803 rt= prev_xc->coeff;
0cea8a03 1804 }
791e7b83 1805 for(x=0; x<w; x++){
0cea8a03
MN
1806 int p=0;
1807 const int l= v;
115329f1 1808
0cea8a03 1809 lt= t; t= rt;
791e7b83 1810
ff765159 1811 if(y){
cbb1d2b1
MN
1812 if(prev_xc->x <= x)
1813 prev_xc++;
1814 if(prev_xc->x == x + 1)
1815 rt= prev_xc->coeff;
ff765159
MN
1816 else
1817 rt=0;
1818 }
cbb1d2b1
MN
1819 if(parent_xc){
1820 if(x>>1 > parent_xc->x){
1821 parent_xc++;
7b49c309 1822 }
cbb1d2b1
MN
1823 if(x>>1 == parent_xc->x){
1824 p= parent_xc->coeff;
ff765159 1825 }
78486403
MN
1826 }
1827 if(/*ll|*/l|lt|t|rt|p){
538a3841 1828 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1829
28869757 1830 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1831 if(v){
1832 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1833 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1834
cbb1d2b1
MN
1835 xc->x=x;
1836 (xc++)->coeff= v;
3c096ac7 1837 }
791e7b83
MN
1838 }else{
1839 if(!run){
b44985ba
MN
1840 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1841 else run= INT_MAX;
3c096ac7
MN
1842 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1843 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
115329f1 1844
cbb1d2b1
MN
1845 xc->x=x;
1846 (xc++)->coeff= v;
791e7b83 1847 }else{
99cd59e5 1848 int max_run;
791e7b83
MN
1849 run--;
1850 v=0;
3c1adccd 1851
cbb1d2b1 1852 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1853 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1854 if(parent_xc)
1855 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1856 x+= max_run;
1857 run-= max_run;
791e7b83
MN
1858 }
1859 }
7b49c309 1860 }
cbb1d2b1
MN
1861 (xc++)->x= w+1; //end marker
1862 prev_xc= prev2_xc;
1863 prev2_xc= xc;
115329f1 1864
cbb1d2b1 1865 if(parent_xc){
7b49c309 1866 if(y&1){
cbb1d2b1
MN
1867 while(parent_xc->x != parent->width+1)
1868 parent_xc++;
1869 parent_xc++;
1870 prev_parent_xc= parent_xc;
7b49c309 1871 }else{
cbb1d2b1 1872 parent_xc= prev_parent_xc;
791e7b83
MN
1873 }
1874 }
1875 }
a0d1931c 1876
cbb1d2b1 1877 (xc++)->x= w+1; //end marker
a0d1931c
Y
1878 }
1879}
1880
1881static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1882 const int w= b->width;
62ab0b78 1883 int y;
c97de57c
MN
1884 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1885 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1886 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1887 int new_index = 0;
115329f1 1888
a0d1931c
Y
1889 START_TIMER
1890
1891 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1892 qadd= 0;
1893 qmul= 1<<QEXPSHIFT;
1894 }
1895
1896 /* If we are on the second or later slice, restore our index. */
1897 if (start_y != 0)
1898 new_index = save_state[0];
1899
115329f1 1900
a0d1931c
Y
1901 for(y=start_y; y<h; y++){
1902 int x = 0;
1903 int v;
1904 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1905 memset(line, 0, b->width*sizeof(DWTELEM));
1906 v = b->x_coeff[new_index].coeff;
1907 x = b->x_coeff[new_index++].x;
1908 while(x < w)
1909 {
538a3841
MN
1910 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1911 register int u= -(v&1);
1912 line[x] = (t^u) - u;
1913
a0d1931c
Y
1914 v = b->x_coeff[new_index].coeff;
1915 x = b->x_coeff[new_index++].x;
1916 }
791e7b83 1917 }
a0d1931c
Y
1918 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1919 STOP_TIMER("decode_subband")
1920 }
115329f1 1921
a0d1931c
Y
1922 /* Save our variables for the next slice. */
1923 save_state[0] = new_index;
115329f1 1924
a0d1931c 1925 return;
791e7b83
MN
1926}
1927
1928static void reset_contexts(SnowContext *s){
1929 int plane_index, level, orientation;
1930
19aa028d 1931 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1932 for(level=0; level<s->spatial_decomposition_count; level++){
1933 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1934 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1935 }
1936 }
1937 }
28869757
MN
1938 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1939 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1940}
1941
1942static int alloc_blocks(SnowContext *s){
1943 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1944 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1945
155ec6ed
MN
1946 s->b_width = w;
1947 s->b_height= h;
115329f1 1948
155ec6ed
MN
1949 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1950 return 0;
1951}
1952
28869757
MN
1953static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1954 uint8_t *bytestream= d->bytestream;
1955 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1956 *d= *s;
28869757
MN
1957 d->bytestream= bytestream;
1958 d->bytestream_start= bytestream_start;
155ec6ed
MN
1959}
1960
1961//near copy & paste from dsputil, FIXME
1962static int pix_sum(uint8_t * pix, int line_size, int w)
1963{
1964 int s, i, j;
1965
1966 s = 0;
1967 for (i = 0; i < w; i++) {
1968 for (j = 0; j < w; j++) {
1969 s += pix[0];
1970 pix ++;
1971 }
1972 pix += line_size - w;
1973 }
1974 return s;
1975}
1976
1977//near copy & paste from dsputil, FIXME
1978static int pix_norm1(uint8_t * pix, int line_size, int w)
1979{
1980 int s, i, j;
1981 uint32_t *sq = squareTbl + 256;
1982
1983 s = 0;
1984 for (i = 0; i < w; i++) {
1985 for (j = 0; j < w; j ++) {
1986 s += sq[pix[0]];
1987 pix ++;
1988 }
1989 pix += line_size - w;
1990 }
1991 return s;
1992}
1993
1994static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1995 const int w= s->b_width << s->block_max_depth;
1996 const int rem_depth= s->block_max_depth - level;
1997 const int index= (x + y*w) << rem_depth;
1998 const int block_w= 1<<rem_depth;
1999 BlockNode block;
2000 int i,j;
115329f1 2001
155ec6ed
MN
2002 block.color[0]= l;
2003 block.color[1]= cb;
2004 block.color[2]= cr;
2005 block.mx= mx;
2006 block.my= my;
2007 block.type= type;
2008 block.level= level;
2009
2010 for(j=0; j<block_w; j++){
2011 for(i=0; i<block_w; i++){
2012 s->block[index + i + j*w]= block;
2013 }
2014 }
2015}
2016
2017static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2018 const int offset[3]= {
2019 y*c-> stride + x,
2020 ((y*c->uvstride + x)>>1),
2021 ((y*c->uvstride + x)>>1),
2022 };
2023 int i;
2024 for(i=0; i<3; i++){
2025 c->src[0][i]= src [i];
2026 c->ref[0][i]= ref [i] + offset[i];
2027 }
2028 assert(!ref_index);
2029}
2030
2031//FIXME copy&paste
2032#define P_LEFT P[1]
2033#define P_TOP P[2]
2034#define P_TOPRIGHT P[3]
2035#define P_MEDIAN P[4]
2036#define P_MV1 P[9]
2037#define FLAG_QPEL 1 //must be 1
2038
2039static int encode_q_branch(SnowContext *s, int level, int x, int y){
2040 uint8_t p_buffer[1024];
2041 uint8_t i_buffer[1024];
2042 uint8_t p_state[sizeof(s->block_state)];
2043 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
2044 RangeCoder pc, ic;
2045 uint8_t *pbbak= s->c.bytestream;
2046 uint8_t *pbbak_start= s->c.bytestream_start;
155ec6ed
MN
2047 int score, score2, iscore, i_len, p_len, block_s, sum;
2048 const int w= s->b_width << s->block_max_depth;
2049 const int h= s->b_height << s->block_max_depth;
2050 const int rem_depth= s->block_max_depth - level;
2051 const int index= (x + y*w) << rem_depth;
2052 const int block_w= 1<<(LOG2_MB_SIZE - level);
155ec6ed
MN
2053 int trx= (x+1)<<rem_depth;
2054 int try= (y+1)<<rem_depth;
2055 BlockNode *left = x ? &s->block[index-1] : &null_block;
2056 BlockNode *top = y ? &s->block[index-w] : &null_block;
2057 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2058 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2059 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2060 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2061 int pl = left->color[0];
2062 int pcb= left->color[1];
2063 int pcr= left->color[2];
2064 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2065 int pmy= mid_pred(left->my, top->my, tr->my);
2066 int mx=0, my=0;
51d6a3cf 2067 int l,cr,cb;
155ec6ed
MN
2068 const int stride= s->current_picture.linesize[0];
2069 const int uvstride= s->current_picture.linesize[1];
51d6a3cf
MN
2070 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2071 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2072 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
155ec6ed
MN
2073 int P[10][2];
2074 int16_t last_mv[3][2];
2075 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2076 const int shift= 1+qpel;
2077 MotionEstContext *c= &s->m.me;
2078 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2079 int my_context= av_log2(2*ABS(left->my - top->my));
2080 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2081
2082 assert(sizeof(s->block_state) >= 256);
2083 if(s->keyframe){
2084 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2085 return 0;
2086 }
2087
155ec6ed
MN
2088// clip predictors / edge ?
2089
2090 P_LEFT[0]= left->mx;
2091 P_LEFT[1]= left->my;
2092 P_TOP [0]= top->mx;
2093 P_TOP [1]= top->my;
2094 P_TOPRIGHT[0]= tr->mx;
2095 P_TOPRIGHT[1]= tr->my;
115329f1 2096
155ec6ed
MN
2097 last_mv[0][0]= s->block[index].mx;
2098 last_mv[0][1]= s->block[index].my;
2099 last_mv[1][0]= right->mx;
2100 last_mv[1][1]= right->my;
2101 last_mv[2][0]= bottom->mx;
2102 last_mv[2][1]= bottom->my;
115329f1 2103
155ec6ed 2104 s->m.mb_stride=2;
115329f1 2105 s->m.mb_x=
155ec6ed
MN
2106 s->m.mb_y= 0;
2107 s->m.me.skip= 0;
2108
2109 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
115329f1 2110
155ec6ed
MN
2111 assert(s->m.me. stride == stride);
2112 assert(s->m.me.uvstride == uvstride);
115329f1 2113
155ec6ed
MN
2114 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2115 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2116 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2117 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
115329f1 2118
ff158dc9
MN
2119 c->xmin = - x*block_w - 16+2;
2120 c->ymin = - y*block_w - 16+2;
2121 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2122 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
2123
2124 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
115329f1 2125 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
155ec6ed
MN
2126 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2127 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2128 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2129 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2130 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2131
2132 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2133 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2134
2135 if (!y) {
2136 c->pred_x= P_LEFT[0];
2137 c->pred_y= P_LEFT[1];
2138 } else {
2139 c->pred_x = P_MEDIAN[0];
2140 c->pred_y = P_MEDIAN[1];
2141 }
2142
115329f1 2143 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
155ec6ed
MN
2144 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2145
2146 assert(mx >= c->xmin);
2147 assert(mx <= c->xmax);
2148 assert(my >= c->ymin);
2149 assert(my <= c->ymax);
115329f1 2150
155ec6ed
MN
2151 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2152 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2153 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
115329f1 2154
155ec6ed
MN
2155 // subpel search
2156 pc= s->c;
28869757
MN
2157 pc.bytestream_start=
2158 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2159 memcpy(p_state, s->block_state, sizeof(s->block_state));
2160
2161 if(level!=s->block_max_depth)
28869757
MN
2162 put_rac(&pc, &p_state[4 + s_context], 1);
2163 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
155ec6ed
MN
2164 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2165 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
28869757
MN
2166 p_len= pc.bytestream - pc.bytestream_start;
2167 score += (s->lambda2*(p_len*8
2168 + (pc.outstanding_count - s->c.outstanding_count)*8
2169 + (-av_log2(pc.range) + av_log2(s->c.range))
2170 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2171
2172 block_s= block_w*block_w;
51d6a3cf 2173 sum = pix_sum(current_data[0], stride, block_w);
155ec6ed 2174 l= (sum + block_s/2)/block_s;
51d6a3cf 2175 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
115329f1 2176
155ec6ed 2177 block_s= block_w*block_w>>2;
51d6a3cf 2178 sum = pix_sum(current_data[1], uvstride, block_w>>1);
155ec6ed
MN
2179 cb= (sum + block_s/2)/block_s;
2180// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
51d6a3cf 2181 sum = pix_sum(current_data[2], uvstride, block_w>>1);
155ec6ed
MN
2182 cr= (sum + block_s/2)/block_s;
2183// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2184
2185 ic= s->c;
28869757
MN
2186 ic.bytestream_start=
2187 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2188 memcpy(i_state, s->block_state, sizeof(s->block_state));
2189 if(level!=s->block_max_depth)
28869757
MN
2190 put_rac(&ic, &i_state[4 + s_context], 1);
2191 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
2192 put_symbol(&ic, &i_state[32], l-pl , 1);
2193 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2194 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757
MN
2195 i_len= ic.bytestream - ic.bytestream_start;
2196 iscore += (s->lambda2*(i_len*8
2197 + (ic.outstanding_count - s->c.outstanding_count)*8
2198 + (-av_log2(ic.range) + av_log2(s->c.range))
2199 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2200
2201// assert(score==256*256*256*64-1);
2202 assert(iscore < 255*255*256 + s->lambda2*10);
2203 assert(iscore >= 0);
2204 assert(l>=0 && l<=255);
2205 assert(pl>=0 && pl<=255);
2206
2207 if(level==0){
2208 int varc= iscore >> 8;
2209 int vard= score >> 8;
2210 if (vard <= 64 || vard < varc)
2211 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2212 else
2213 c->scene_change_score+= s->m.qscale;
2214 }
115329f1 2215
155ec6ed 2216 if(level!=s->block_max_depth){
28869757 2217 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
2218 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2219 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2220 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2221 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2222 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
115329f1 2223
155ec6ed
MN
2224 if(score2 < score && score2 < iscore)
2225 return score2;
2226 }
115329f1 2227
155ec6ed 2228 if(iscore < score){
28869757 2229 memcpy(pbbak, i_buffer, i_len);
155ec6ed 2230 s->c= ic;
28869757
MN
2231 s->c.bytestream_start= pbbak_start;
2232 s->c.bytestream= pbbak + i_len;
155ec6ed
MN
2233 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2234 memcpy(s->block_state, i_state, sizeof(s->block_state));
2235 return iscore;
2236 }else{
28869757 2237 memcpy(pbbak, p_buffer, p_len);
155ec6ed 2238 s->c= pc;
28869757
MN
2239 s->c.bytestream_start= pbbak_start;
2240 s->c.bytestream= pbbak + p_len;
155ec6ed
MN
2241 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2242 memcpy(s->block_state, p_state, sizeof(s->block_state));
2243 return score;
2244 }
2245}
2246
51d6a3cf
MN
2247static always_inline int same_block(BlockNode *a, BlockNode *b){
2248 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2249 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2250 }else{
2251 return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2252 }
2253}
2254
2255static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2256 const int w= s->b_width << s->block_max_depth;
2257 const int rem_depth= s->block_max_depth - level;
2258 const int index= (x + y*w) << rem_depth;
2259 int trx= (x+1)<<rem_depth;
2260 BlockNode *b= &s->block[index];
2261 BlockNode *left = x ? &s->block[index-1] : &null_block;
2262 BlockNode *top = y ? &s->block[index-w] : &null_block;
2263 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2264 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2265 int pl = left->color[0];
2266 int pcb= left->color[1];
2267 int pcr= left->color[2];
2268 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2269 int pmy= mid_pred(left->my, top->my, tr->my);
2270 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2271 int my_context= av_log2(2*ABS(left->my - top->my));
2272 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2273
2274 if(s->keyframe){
2275 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2276 return;
2277 }
2278
2279 if(level!=s->block_max_depth){
2280 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
d6f41eed
MN
2281 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2282 }else{
51d6a3cf
MN
2283 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2284 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2285 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2286 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2287 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2288 return;
51d6a3cf
MN
2289 }
2290 }
2291 if(b->type & BLOCK_INTRA){
2292 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2293 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2294 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2295 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2296 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2297 }else{
2298 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2299 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2300 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2301 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2302 }
2303}
2304
155ec6ed
MN
2305static void decode_q_branch(SnowContext *s, int level, int x, int y){
2306 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2307 const int rem_depth= s->block_max_depth - level;
2308 const int index= (x + y*w) << rem_depth;
155ec6ed 2309 int trx= (x+1)<<rem_depth;
155ec6ed
MN
2310 BlockNode *left = x ? &s->block[index-1] : &null_block;
2311 BlockNode *top = y ? &s->block[index-w] : &null_block;
2312 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2313 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2314 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2315
155ec6ed
MN
2316 if(s->keyframe){
2317 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2318 return;
2319 }
2320
28869757 2321 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
155ec6ed
MN
2322 int type;
2323 int l = left->color[0];
2324 int cb= left->color[1];
2325 int cr= left->color[2];
2326 int mx= mid_pred(left->mx, top->mx, tr->mx);
2327 int my= mid_pred(left->my, top->my, tr->my);
2328 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2329 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
115329f1 2330
28869757 2331 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2332
2333 if(type){
2334 l += get_symbol(&s->c, &s->block_state[32], 1);
2335 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2336 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2337 }else{
2338 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2339 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2340 }
2341 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2342 }else{
2343 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2344 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2345 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2346 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2347 }
2348}
2349
2350static void encode_blocks(SnowContext *s){
2351 int x, y;
2352 int w= s->b_width;
2353 int h= s->b_height;
2354
51d6a3cf
MN
2355 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2356 iterative_me(s);
2357
155ec6ed 2358 for(y=0; y<h; y++){
d06c75a8 2359 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2360 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2361 return;
2362 }
155ec6ed 2363 for(x=0; x<w; x++){
51d6a3cf
MN
2364 if(s->avctx->me_method == ME_ITER)
2365 encode_q_branch2(s, 0, x, y);
2366 else
2367 encode_q_branch (s, 0, x, y);
155ec6ed
MN
2368 }
2369 }
2370}
2371
2372static void decode_blocks(SnowContext *s){
2373 int x, y;
2374 int w= s->b_width;
2375 int h= s->b_height;
2376
2377 for(y=0; y<h; y++){
2378 for(x=0; x<w; x++){
2379 decode_q_branch(s, 0, x, y);
2380 }
2381 }
791e7b83
MN
2382}
2383
2384static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2385 int x, y;
3924dac4 2386START_TIMER
791e7b83
MN
2387 for(y=0; y < b_h+5; y++){
2388 for(x=0; x < b_w; x++){
3924dac4
MN
2389 int a0= src[x ];
2390 int a1= src[x + 1];
2391 int a2= src[x + 2];
2392 int a3= src[x + 3];
2393 int a4= src[x + 4];
2394 int a5= src[x + 5];
791e7b83
MN
2395// int am= 9*(a1+a2) - (a0+a3);
2396 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2397// int am= 18*(a2+a3) - 2*(a1+a4);
2398// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2399// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2400
2401// if(b_w==16) am= 8*(a1+a2);
2402
8c2515bb
Y
2403 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2404 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
115329f1 2405
8c2515bb
Y
2406 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2407 if(am&(~255)) am= ~(am>>31);
115329f1 2408
8c2515bb 2409 tmp[x] = am;
791e7b83
MN
2410
2411/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2412 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2413 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2414 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2415 }
3924dac4
MN
2416 tmp += stride;
2417 src += stride;
791e7b83 2418 }
3924dac4 2419 tmp -= (b_h+5)*stride;
115329f1 2420
791e7b83
MN
2421 for(y=0; y < b_h; y++){
2422 for(x=0; x < b_w; x++){
3924dac4
MN
2423 int a0= tmp[x + 0*stride];
2424 int a1= tmp[x + 1*stride];
2425 int a2= tmp[x + 2*stride];
2426 int a3= tmp[x + 3*stride];
2427 int a4= tmp[x + 4*stride];
2428 int a5= tmp[x + 5*stride];
791e7b83
MN
2429 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2430// int am= 18*(a2+a3) - 2*(a1+a4);
2431/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2432 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
115329f1 2433
791e7b83
MN
2434// if(b_w==16) am= 8*(a1+a2);
2435
8c2515bb
Y
2436 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2437 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83 2438
8c2515bb 2439 if(am&(~255)) am= ~(am>>31);
115329f1 2440
8c2515bb 2441 dst[x] = am;
791e7b83
MN
2442/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2443 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2444 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2445 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2446 }
3924dac4
MN
2447 dst += stride;
2448 tmp += stride;
791e7b83 2449 }
3924dac4 2450STOP_TIMER("mc_block")
791e7b83
MN
2451}
2452
791e7b83 2453#define mca(dx,dy,b_w)\
d92b5807 2454static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
791e7b83
MN
2455 uint8_t tmp[stride*(b_w+5)];\
2456 assert(h==b_w);\
2457 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2458}
2459
2460mca( 0, 0,16)
2461mca( 8, 0,16)
2462mca( 0, 8,16)
2463mca( 8, 8,16)
d92b5807
MN
2464mca( 0, 0,8)
2465mca( 8, 0,8)
2466mca( 0, 8,8)
2467mca( 8, 8,8)
791e7b83 2468
ff158dc9 2469static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 2470 if(block->type & BLOCK_INTRA){
ff158dc9 2471 int x, y;
2692ceab
MN
2472 const int color = block->color[plane_index];
2473 const int color4= color*0x01010101;
2474 if(b_w==16){
2475 for(y=0; y < b_h; y++){
2476 *(uint32_t*)&dst[0 + y*stride]= color4;
2477 *(uint32_t*)&dst[4 + y*stride]= color4;
2478 *(uint32_t*)&dst[8 + y*stride]= color4;
2479 *(uint32_t*)&dst[12+ y*stride]= color4;
2480 }
2481 }else if(b_w==8){
2482 for(y=0; y < b_h; y++){
2483 *(uint32_t*)&dst[0 + y*stride]= color4;
2484 *(uint32_t*)&dst[4 + y*stride]= color4;
2485 }
2486 }else if(b_w==4){
2487 for(y=0; y < b_h; y++){
2488 *(uint32_t*)&dst[0 + y*stride]= color4;
2489 }
2490 }else{
2491 for(y=0; y < b_h; y++){
2492 for(x=0; x < b_w; x++){
2493 dst[x + y*stride]= color;
2494 }
ff158dc9
MN
2495 }
2496 }
2497 }else{
2498 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2499 int mx= block->mx*scale;
2500 int my= block->my*scale;
ec697587
MN
2501 const int dx= mx&15;
2502 const int dy= my&15;
80e44bc3 2503 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
ff158dc9
MN
2504 sx += (mx>>4) - 2;
2505 sy += (my>>4) - 2;
2506 src += sx + sy*stride;
2507 if( (unsigned)sx >= w - b_w - 4
2508 || (unsigned)sy >= h - b_h - 4){
2509 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2510 src= tmp + MB_SIZE;
2511 }
2692ceab
MN
2512 assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2513 assert(!(b_w&(b_w-1)));
2514 assert(b_w>1 && b_h>1);
80e44bc3
MN
2515 assert(tab_index>=0 && tab_index<4);
2516 if((dx&3) || (dy&3))
ec697587 2517 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2692ceab 2518 else if(b_w==b_h)
80e44bc3 2519 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2692ceab 2520 else if(b_w==2*b_h){
80e44bc3
MN
2521 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2522 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2692ceab
MN
2523 }else{
2524 assert(2*b_w==b_h);
80e44bc3
MN
2525 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2526 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2692ceab 2527 }
ff158dc9
MN
2528 }
2529}
2530
ff158dc9 2531//FIXME name clenup (b_w, block_w, b_width stuff)
a0d1931c
Y
2532static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2533 DWTELEM * dst = NULL;
2534 const int b_width = s->b_width << s->block_max_depth;
2535 const int b_height= s->b_height << s->block_max_depth;
2536 const int b_stride= b_width;
2537 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2538 BlockNode *rt= lt+1;
2539 BlockNode *lb= lt+b_stride;
2540 BlockNode *rb= lb+1;
115329f1 2541 uint8_t *block[4];
cc884a35
MN
2542 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2543 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2544 uint8_t *ptmp;
a0d1931c
Y
2545 int x,y;
2546
2547 if(b_x<0){
2548 lt= rt;
2549 lb= rb;
2550 }else if(b_x + 1 >= b_width){
2551 rt= lt;
2552 rb= lb;
2553 }
2554 if(b_y<0){
2555 lt= lb;
2556 rt= rb;
2557 }else if(b_y + 1 >= b_height){
2558 lb= lt;
2559 rb= rt;
2560 }
115329f1 2561
a0d1931c
Y
2562 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2563 obmc -= src_x;
2564 b_w += src_x;
2565 src_x=0;
2566 }else if(src_x + b_w > w){
2567 b_w = w - src_x;
2568 }
2569 if(src_y<0){
2570 obmc -= src_y*obmc_stride;
2571 b_h += src_y;
2572 src_y=0;
2573 }else if(src_y + b_h> h){
2574 b_h = h - src_y;
2575 }
115329f1 2576
a0d1931c
Y
2577 if(b_w<=0 || b_h<=0) return;
2578
cc884a35 2579assert(src_stride > 2*MB_SIZE + 5);
a0d1931c
Y
2580// old_dst += src_x + src_y*dst_stride;
2581 dst8+= src_x + src_y*src_stride;
2582// src += src_x + src_y*src_stride;
2583
cc884a35
MN
2584 ptmp= tmp + 3*tmp_step;
2585 block[0]= ptmp;
2586 ptmp+=tmp_step;
115329f1 2587 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
a0d1931c
Y
2588
2589 if(same_block(lt, rt)){
2590 block[1]= block[0];
2591 }else{
cc884a35
MN
2592 block[1]= ptmp;
2593 ptmp+=tmp_step;
a0d1931c
Y
2594 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2595 }
115329f1 2596
a0d1931c
Y
2597 if(same_block(lt, lb)){
2598 block[2]= block[0];
2599 }else if(same_block(rt, lb)){
2600 block[2]= block[1];
2601 }else{
cc884a35
MN
2602 block[2]= ptmp;
2603 ptmp+=tmp_step;
a0d1931c
Y
2604 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2605 }
2606
2607 if(same_block(lt, rb) ){
2608 block[3]= block[0];
2609 }else if(same_block(rt, rb)){
2610 block[3]= block[1];
2611 }else if(same_block(lb, rb)){
2612 block[3]= block[2];
2613 }else{
cc884a35 2614 block[3]= ptmp;
a0d1931c
Y
2615 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2616 }
2617#if 0
2618 for(y=0; y<b_h; y++){
2619 for(x=0; x<b_w; x++){
2620 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2621 if(add) dst[x + y*dst_stride] += v;
2622 else dst[x + y*dst_stride] -= v;
2623 }
2624 }
2625 for(y=0; y<b_h; y++){
2626 uint8_t *obmc2= obmc + (obmc_stride>>1);
2627 for(x=0; x<b_w; x++){
2628 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2629 if(add) dst[x + y*dst_stride] += v;
2630 else dst[x + y*dst_stride] -= v;
2631 }
2632 }
2633 for(y=0; y<b_h; y++){
2634 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2635 for(x=0; x<b_w; x++){
2636 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2637 if(add) dst[x + y*dst_stride] += v;
2638 else dst[x + y*dst_stride] -= v;
2639 }
2640 }
2641 for(y=0; y<b_h; y++){
2642 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2643 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2644 for(x=0; x<b_w; x++){
2645 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2646 if(add) dst[x + y*dst_stride] += v;
2647 else dst[x + y*dst_stride] -= v;
2648 }
2649 }
2650#else
2651{
2652
2653 START_TIMER
115329f1 2654
a0d1931c
Y
2655 for(y=0; y<b_h; y++){
2656 //FIXME ugly missue of obmc_stride
2657 uint8_t *obmc1= obmc + y*obmc_stride;
2658 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2659 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2660 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2661 dst = slice_buffer_get_line(sb, src_y + y);
2662 for(x=0; x<b_w; x++){
2663 int v= obmc1[x] * block[3][x + y*src_stride]
2664 +obmc2[x] * block[2][x + y*src_stride]
2665 +obmc3[x] * block[1][x + y*src_stride]
2666 +obmc4[x] * block[0][x + y*src_stride];
2667
2668 v <<= 8 - LOG2_OBMC_MAX;
2669 if(FRAC_BITS != 8){
2670 v += 1<<(7 - FRAC_BITS);
2671 v >>= 8 - FRAC_BITS;
2672 }
2673 if(add){
2674// v += old_dst[x + y*dst_stride];
2675 v += dst[x + src_x];
2676 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2677 if(v&(~255)) v= ~(v>>31);
2678 dst8[x + y*src_stride] = v;
2679 }else{
2680// old_dst[x + y*dst_stride] -= v;
2681 dst[x + src_x] -= v;
2682 }
2683 }
2684 }
2685 STOP_TIMER("Inner add y block")
2686}
2687#endif
2688}
2689
2690//FIXME name clenup (b_w, block_w, b_width stuff)
715a97f0 2691static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
ff158dc9
MN
2692 const int b_width = s->b_width << s->block_max_depth;
2693 const int b_height= s->b_height << s->block_max_depth;
2694 const int b_stride= b_width;
2695 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2696 BlockNode *rt= lt+1;
2697 BlockNode *lb= lt+b_stride;
2698 BlockNode *rb= lb+1;
115329f1 2699 uint8_t *block[4];
cc884a35
MN
2700 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2701 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2702 uint8_t *ptmp;
791e7b83
MN
2703 int x,y;
2704
ff158dc9
MN
2705 if(b_x<0){
2706 lt= rt;
2707 lb= rb;
2708 }else if(b_x + 1 >= b_width){
2709 rt= lt;
2710 rb= lb;
791e7b83 2711 }
ff158dc9
MN
2712 if(b_y<0){
2713 lt= lb;
2714 rt= rb;
2715 }else if(b_y + 1 >= b_height){
2716 lb= lt;
2717 rb= rt;
2718 }
115329f1 2719
ff158dc9
MN
2720 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2721 obmc -= src_x;
2722 b_w += src_x;
2723 src_x=0;
2724 }else if(src_x + b_w > w){
2725 b_w = w - src_x;
2726 }
2727 if(src_y<0){
2728 obmc -= src_y*obmc_stride;
2729 b_h += src_y;
2730 src_y=0;
2731 }else if(src_y + b_h> h){
2732 b_h = h - src_y;
791e7b83 2733 }
115329f1 2734
ff158dc9 2735 if(b_w<=0 || b_h<=0) return;
155ec6ed 2736
cc884a35 2737assert(src_stride > 2*MB_SIZE + 5);
ff158dc9 2738 dst += src_x + src_y*dst_stride;
715a97f0 2739 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2740// src += src_x + src_y*src_stride;
2741
cc884a35
MN
2742 ptmp= tmp + 3*tmp_step;
2743 block[0]= ptmp;
2744 ptmp+=tmp_step;
115329f1 2745 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
2746
2747 if(same_block(lt, rt)){
2748 block[1]= block[0];
791e7b83 2749 }else{
cc884a35
MN
2750 block[1]= ptmp;
2751 ptmp+=tmp_step;
ff158dc9
MN
2752 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2753 }
115329f1 2754
ff158dc9
MN
2755 if(same_block(lt, lb)){
2756 block[2]= block[0];
2757 }else if(same_block(rt, lb)){
2758 block[2]= block[1];
2759 }else{
cc884a35
MN
2760 block[2]= ptmp;
2761 ptmp+=tmp_step;
ff158dc9
MN
2762 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2763 }
791e7b83 2764
ff158dc9
MN
2765 if(same_block(lt, rb) ){
2766 block[3]= block[0];
2767 }else if(same_block(rt, rb)){
2768 block[3]= block[1];
2769 }else if(same_block(lb, rb)){
2770 block[3]= block[2];
2771 }else{
cc884a35 2772 block[3]= ptmp;
ff158dc9
MN
2773 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2774 }
2775#if 0
2776 for(y=0; y<b_h; y++){
2777 for(x=0; x<b_w; x++){
2778 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2779 if(add) dst[x + y*dst_stride] += v;
2780 else dst[x + y*dst_stride] -= v;
2781 }
2782 }
2783 for(y=0; y<b_h; y++){
2784 uint8_t *obmc2= obmc + (obmc_stride>>1);
2785 for(x=0; x<b_w; x++){
2786 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2787 if(add) dst[x + y*dst_stride] += v;
2788 else dst[x + y*dst_stride] -= v;
2789 }
2790 }
2791 for(y=0; y<b_h; y++){
2792 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2793 for(x=0; x<b_w; x++){
2794 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2795 if(add) dst[x + y*dst_stride] += v;
2796 else dst[x + y*dst_stride] -= v;
2797 }
2798 }
2799 for(y=0; y<b_h; y++){
2800 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2801 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2802 for(x=0; x<b_w; x++){
2803 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2804 if(add) dst[x + y*dst_stride] += v;
2805 else dst[x + y*dst_stride] -= v;
2806 }
2807 }
2808#else
2809 for(y=0; y<b_h; y++){
2810 //FIXME ugly missue of obmc_stride
2811 uint8_t *obmc1= obmc + y*obmc_stride;
2812 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2813 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2814 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2815 for(x=0; x<b_w; x++){
2816 int v= obmc1[x] * block[3][x + y*src_stride]
2817 +obmc2[x] * block[2][x + y*src_stride]
2818 +obmc3[x] * block[1][x + y*src_stride]
2819 +obmc4[x] * block[0][x + y*src_stride];
115329f1 2820
715a97f0 2821 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2822 if(FRAC_BITS != 8){
2823 v += 1<<(7 - FRAC_BITS);
2824 v >>= 8 - FRAC_BITS;
2825 }
715a97f0
MN
2826 if(add){
2827 v += dst[x + y*dst_stride];
2828 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2829 if(v&(~255)) v= ~(v>>31);
2830 dst8[x + y*src_stride] = v;
2831 }else{
2832 dst[x + y*dst_stride] -= v;
2833 }
791e7b83
MN
2834 }
2835 }
ff158dc9 2836#endif
791e7b83
MN
2837}
2838
a0d1931c
Y
2839static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2840 Plane *p= &s->plane[plane_index];
2841 const int mb_w= s->b_width << s->block_max_depth;
2842 const int mb_h= s->b_height << s->block_max_depth;
2843 int x, y, mb_x;
2844 int block_size = MB_SIZE >> s->block_max_depth;
2845 int block_w = plane_index ? block_size/2 : block_size;
2846 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2847 int obmc_stride= plane_index ? block_size : 2*block_size;
2848 int ref_stride= s->current_picture.linesize[plane_index];
2849 uint8_t *ref = s->last_picture.data[plane_index];
2850 uint8_t *dst8= s->current_picture.data[plane_index];
2851 int w= p->width;
2852 int h= p->height;
2853 START_TIMER
115329f1 2854
a0d1931c
Y
2855 if(s->keyframe || (s->avctx->debug&512)){
2856 if(mb_y==mb_h)
2857 return;
2858
2859 if(add){
86e59cc0 2860 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2861 {
2862// DWTELEM * line = slice_buffer_get_line(sb, y);
2863 DWTELEM * line = sb->line[y];
2864 for(x=0; x<w; x++)
2865 {
2866// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2867 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2868 v >>= FRAC_BITS;
2869 if(v&(~255)) v= ~(v>>31);
2870 dst8[x + y*ref_stride]= v;
2871 }
2872 }
2873 }else{
86e59cc0 2874 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2875 {
2876// DWTELEM * line = slice_buffer_get_line(sb, y);
2877 DWTELEM * line = sb->line[y];
2878 for(x=0; x<w; x++)
2879 {
2880 line[x] -= 128 << FRAC_BITS;
2881// buf[x + y*w]-= 128<<FRAC_BITS;
2882 }
2883 }
2884 }
2885
2886 return;
2887 }
115329f1 2888
a0d1931c
Y
2889 for(mb_x=0; mb_x<=mb_w; mb_x++){
2890 START_TIMER
2891
115329f1 2892 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
a0d1931c
Y
2893 block_w*mb_x - block_w/2,
2894 block_w*mb_y - block_w/2,
2895 block_w, block_w,
2896 w, h,
2897 w, ref_stride, obmc_stride,
2898 mb_x - 1, mb_y - 1,
2899 add, plane_index);
115329f1 2900
a0d1931c
Y
2901 STOP_TIMER("add_yblock")
2902 }
115329f1 2903
a0d1931c
Y
2904 STOP_TIMER("predict_slice")
2905}
2906
f9e6ebf7 2907static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2908 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2909 const int mb_w= s->b_width << s->block_max_depth;
2910 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2911 int x, y, mb_x;
155ec6ed
MN
2912 int block_size = MB_SIZE >> s->block_max_depth;
2913 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2914 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2915 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2916 int ref_stride= s->current_picture.linesize[plane_index];
791e7b83 2917 uint8_t *ref = s->last_picture.data[plane_index];
715a97f0 2918 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2919 int w= p->width;
2920 int h= p->height;
fff6d4ea 2921 START_TIMER
115329f1 2922
ff158dc9 2923 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2924 if(mb_y==mb_h)
2925 return;
2926
715a97f0 2927 if(add){
86e59cc0 2928 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2929 for(x=0; x<w; x++){
2930 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2931 v >>= FRAC_BITS;
2932 if(v&(~255)) v= ~(v>>31);
2933 dst8[x + y*ref_stride]= v;
2934 }
2935 }
2936 }else{
86e59cc0 2937 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2938 for(x=0; x<w; x++){
2939 buf[x + y*w]-= 128<<FRAC_BITS;
2940 }
ff158dc9 2941 }
791e7b83 2942 }
ff158dc9
MN
2943
2944 return;
791e7b83 2945 }
115329f1 2946
ff158dc9 2947 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2948 START_TIMER
ff158dc9 2949
115329f1 2950 add_yblock(s, buf, dst8, ref, obmc,
ff158dc9 2951 block_w*mb_x - block_w/2,
791e7b83 2952 block_w*mb_y - block_w/2,
ff158dc9 2953 block_w, block_w,
791e7b83 2954 w, h,
ff158dc9
MN
2955 w, ref_stride, obmc_stride,
2956 mb_x - 1, mb_y - 1,
2957 add, plane_index);
115329f1 2958
ff158dc9 2959 STOP_TIMER("add_yblock")
791e7b83 2960 }
115329f1 2961
f9e6ebf7
LM
2962 STOP_TIMER("predict_slice")
2963}
2964
2965static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2966 const int mb_h= s->b_height << s->block_max_depth;
2967 int mb_y;
2968 for(mb_y=0; mb_y<=mb_h; mb_y++)
2969 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2970}
2971
51d6a3cf
MN
2972static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2973 int i, x2, y2;
2974 Plane *p= &s->plane[plane_index];
2975 const int block_size = MB_SIZE >> s->block_max_depth;
2976 const int block_w = plane_index ? block_size/2 : block_size;
2977 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2978 const int obmc_stride= plane_index ? block_size : 2*block_size;
2979 const int ref_stride= s->current_picture.linesize[plane_index];
2980 uint8_t *ref= s-> last_picture.data[plane_index];
2981 uint8_t *dst= s->current_picture.data[plane_index];
2982 uint8_t *src= s-> input_picture.data[plane_index];
2983 const static DWTELEM zero_dst[4096]; //FIXME
2984 const int b_stride = s->b_width << s->block_max_depth;
2985 const int w= p->width;
2986 const int h= p->height;
2987 int index= mb_x + mb_y*b_stride;
2988 BlockNode *b= &s->block[index];
2989 BlockNode backup= *b;
2990 int ab=0;
2991 int aa=0;
2992
2993 b->type|= BLOCK_INTRA;
2994 b->color[plane_index]= 0;
2995
2996 for(i=0; i<4; i++){
2997 int mb_x2= mb_x + (i &1) - 1;
2998 int mb_y2= mb_y + (i>>1) - 1;
2999 int x= block_w*mb_x2 + block_w/2;
3000 int y= block_w*mb_y2 + block_w/2;
3001
3002 add_yblock(s, zero_dst, dst, ref, obmc,
3003 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
3004
3005 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3006 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3007 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3008 int obmc_v= obmc[index];
3009 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3010 if(x<0) obmc_v += obmc[index + block_w];
3011 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3012 if(x+block_w>w) obmc_v += obmc[index - block_w];
3013 //FIXME precalc this or simplify it somehow else
3014
3015 ab += (src[x2 + y2*ref_stride] - dst[x2 + y2*ref_stride]) * obmc_v;
3016 aa += obmc_v * obmc_v; //FIXME precalclate this
3017 }
3018 }
3019 }
3020 *b= backup;
3021
3022 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3023}
3024
3025static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3026 int i, y2;
3027 Plane *p= &s->plane[plane_index];
3028 const int block_size = MB_SIZE >> s->block_max_depth;
3029 const int block_w = plane_index ? block_size/2 : block_size;
3030 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3031 const int obmc_stride= plane_index ? block_size : 2*block_size;
3032 const int ref_stride= s->current_picture.linesize[plane_index];
3033 uint8_t *ref= s-> last_picture.data[plane_index];
3034 uint8_t *dst= s->current_picture.data[plane_index];
3035 uint8_t *src= s-> input_picture.data[plane_index];
3036 const static DWTELEM zero_dst[4096]; //FIXME
3037 const int b_stride = s->b_width << s->block_max_depth;
3038 const int b_height = s->b_height<< s->block_max_depth;
3039 const int w= p->width;
3040 const int h= p->height;
3041 int distortion= 0;
3042 int rate= 0;
3043 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3044
3045 for(i=0; i<4; i++){
3046 int mb_x2= mb_x + (i &1) - 1;
3047 int mb_y2= mb_y + (i>>1) - 1;
3048 int x= block_w*mb_x2 + block_w/2;
3049 int y= block_w*mb_y2 + block_w/2;
3050
3051 add_yblock(s, zero_dst, dst, ref, obmc,
3052 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
3053
3054 //FIXME find a cleaner/simpler way to skip the outside stuff
3055 for(y2= y; y2<0; y2++)
3056 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3057 for(y2= h; y2<y+block_w; y2++)
3058 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3059 if(x<0){
3060 for(y2= y; y2<y+block_w; y2++)
3061 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3062 }
3063 if(x+block_w > w){
3064 for(y2= y; y2<y+block_w; y2++)
3065 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3066 }
3067
3068 assert(block_w== 8 || block_w==16);
3069 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3070 }
3071
3072 if(plane_index==0){
3073 for(i=0; i<4; i++){
3074/* ..RRr
3075 * .RXx.
3076 * rxx..
3077 */
3078 int x= mb_x + (i&1) - (i>>1);
3079 int y= mb_y + (i>>1);
3080 int index= x + y*b_stride;
3081 BlockNode *b = &s->block[index];
3082 BlockNode *left = x ? &s->block[index-1] : &null_block;
3083 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3084 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3085 BlockNode *tr = y && x+1<b_stride ? &s->block[index-b_stride+1] : tl;
3086 int dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3087 int dmy= b->my - mid_pred(left->my, top->my, tr->my);
3088// int mx_context= av_log2(2*ABS(left->mx - top->mx));
3089// int my_context= av_log2(2*ABS(left->my - top->my));
3090
3091 if(x<0 || x>=b_stride || y>=b_height)
3092 continue;
3093/*
30941 0 0
309501X 1-2 1
3096001XX 3-6 2-3
30970001XXX 7-14 4-7
309800001XXXX 15-30 8-15
3099*/
3100//FIXME try accurate rate
3101//FIXME intra and inter predictors if surrounding blocks arent the same type
3102 if(b->type & BLOCK_INTRA){
3103 rate += 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3104 + av_log2(2*ABS(left->color[1] - b->color[1]))
3105 + av_log2(2*ABS(left->color[2] - b->color[2])));
3106 }else
3107 rate += 2*(1 + av_log2(2*ABS(dmx))
3108 + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3109 }
3110 }
3111
3112 return distortion + rate*penalty_factor;
3113}
3114
3115static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, int *best_rd){
3116 const int b_stride= s->b_width << s->block_max_depth;
3117 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3118 BlockNode backup= *block;
3119 int rd, index, value;
3120
3121 assert(mb_x>=0 && mb_y>=0);
735f9f34 3122 assert(mb_x<b_stride);
51d6a3cf
MN
3123
3124 if(intra){
3125 block->color[0] = p[0];
3126 block->color[1] = p[1];
3127 block->color[2] = p[2];
3128 block->type |= BLOCK_INTRA;
3129 }else{
3130 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3131 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3132 if(s->me_cache[index] == value)
3133 return 0;
3134 s->me_cache[index]= value;
3135
3136 block->mx= p[0];
3137 block->my= p[1];
3138 block->type &= ~BLOCK_INTRA;
3139 }
3140
3141 rd= get_block_rd(s, mb_x, mb_y, 0);
3142
3143//FIXME chroma
3144 if(rd < *best_rd){
3145 *best_rd= rd;
3146 return 1;
3147 }else{
3148 *block= backup;
3149 return 0;
3150 }
3151}
3152
52137f2f 3153/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
003cd80d 3154static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, int *best_rd){
52137f2f
FR
3155 int p[2] = {p0, p1};
3156 return check_block(s, mb_x, mb_y, p, intra, best_rd);
3157}
3158
51d6a3cf
MN
3159static void iterative_me(SnowContext *s){
3160 int pass, mb_x, mb_y;
3161 const int b_width = s->b_width << s->block_max_depth;
3162 const int b_height= s->b_height << s->block_max_depth;
3163 const int b_stride= b_width;
3164 int color[3];
3165
3166 for(pass=0; pass<50; pass++){
3167 int change= 0;
3168
3169 for(mb_y= 0; mb_y<b_height; mb_y++){
3170 for(mb_x= 0; mb_x<b_width; mb_x++){
3171 int dia_change, i, j;
3172 int best_rd= INT_MAX;
3173 BlockNode backup;
3174 const int index= mb_x + mb_y * b_stride;
3175 BlockNode *block= &s->block[index];
3176 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block;
3177 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block;
3178 BlockNode *rb = mb_x<b_width ? &s->block[index +1] : &null_block;
3179 BlockNode *bb = mb_y<b_height ? &s->block[index+b_stride ] : &null_block;
3180 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block;
3181 BlockNode *trb= mb_x<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
3182 BlockNode *blb= mb_x && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block;
3183 BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block;
3184
3185 if(pass && (block->type & BLOCK_OPT))
3186 continue;
3187 block->type |= BLOCK_OPT;
3188
3189 backup= *block;
3190
3191 if(!s->me_cache_generation)
3192 memset(s->me_cache, 0, sizeof(s->me_cache));
3193 s->me_cache_generation += 1<<22;
3194
3195 // get previous score (cant be cached due to OBMC)
003cd80d
FR
3196 check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, &best_rd);
3197 check_block_inter(s, mb_x, mb_y, 0, 0, 0, &best_rd);
3198 check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, &best_rd);
3199 check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, &best_rd);
3200 check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, &best_rd);
3201 check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, &best_rd);
51d6a3cf
MN
3202
3203 /* fullpel ME */
3204 //FIXME avoid subpel interpol / round to nearest integer
3205 do{
3206 dia_change=0;
3207 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3208 for(j=0; j<i; j++){
003cd80d
FR
3209 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, &best_rd);
3210 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, &best_rd);
3211 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, &best_rd);
3212 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, &best_rd);
51d6a3cf
MN
3213 }
3214 }
3215 }while(dia_change);
3216 /* subpel ME */
3217 do{
3218 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3219 dia_change=0;
3220 for(i=0; i<8; i++)
003cd80d 3221 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, &best_rd);
51d6a3cf
MN
3222 }while(dia_change);
3223 //FIXME or try the standard 2 pass qpel or similar
13705b69 3224#if 1
51d6a3cf
MN
3225 for(i=0; i<3; i++){
3226 color[i]= get_dc(s, mb_x, mb_y, i);
3227 }
3228 check_block(s, mb_x, mb_y, color, 1, &best_rd);
3229 //FIXME RD style color selection
13705b69 3230#endif
51d6a3cf
MN
3231 if(!same_block(block, &backup)){
3232 if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3233 if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3234 if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3235 if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3236 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3237 if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3238 if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3239 if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3240 change ++;
3241 }
3242 }
3243 }
3244 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3245 if(!change)
3246 break;
3247 }
3248}
3249
791e7b83
MN
3250static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3251 const int level= b->level;
3252 const int w= b->width;
3253 const int h= b->height;
c97de57c
MN
3254 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3255 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
da66b631 3256 int x,y, thres1, thres2;
62ab0b78 3257// START_TIMER
791e7b83 3258
93fbdb5a 3259 if(s->qlog == LOSSLESS_QLOG) return;
115329f1 3260
791e7b83 3261 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
3262 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3263 thres2= 2*thres1;
115329f1 3264
791e7b83
MN
3265 if(!bias){
3266 for(y=0; y<h; y++){
3267 for(x=0; x<w; x++){
da66b631 3268 int i= src[x + y*stride];
115329f1 3269
da66b631
MN
3270 if((unsigned)(i+thres1) > thres2){
3271 if(i>=0){
3272 i<<= QEXPSHIFT;
3273 i/= qmul; //FIXME optimize
3274 src[x + y*stride]= i;
3275 }else{
3276 i= -i;
3277 i<<= QEXPSHIFT;
3278 i/= qmul; //FIXME optimize
3279 src[x + y*stride]= -i;
3280 }
3281 }else
3282 src[x + y*stride]= 0;
791e7b83
MN
3283 }
3284 }
3285 }else{
3286 for(y=0; y<h; y++){
3287 for(x=0; x<w; x++){
115329f1
DB
3288 int i= src[x + y*stride];
3289
da66b631
MN
3290 if((unsigned)(i+thres1) > thres2){
3291 if(i>=0){
3292 i<<= QEXPSHIFT;
3293 i= (i + bias) / qmul; //FIXME optimize
3294 src[x + y*stride]= i;
3295 }else{
3296 i= -i;
3297 i<<= QEXPSHIFT;
3298 i= (i + bias) / qmul; //FIXME optimize
3299 src[x + y*stride]= -i;
3300 }
3301 }else
3302 src[x + y*stride]= 0;
791e7b83
MN
3303 }
3304 }
3305 }
da66b631
MN
3306 if(level+1 == s->spatial_decomposition_count){
3307// STOP_TIMER("quantize")
3308 }
791e7b83
MN
3309}
3310
66b32bf2 3311static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
a0d1931c 3312 const int w= b->width;
c97de57c
MN
3313 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3314 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
3315 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3316 int x,y;
3317 START_TIMER
115329f1 3318
a0d1931c 3319 if(s->qlog == LOSSLESS_QLOG) return;
115329f1 3320
66b32bf2 3321 for(y=start_y; y<end_y; y++){
a0d1931c
Y
3322// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3323 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3324 for(x=0; x<w; x++){
3325 int i= line[x];
3326 if(i<0){
3327 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3328 }else if(i>0){
3329 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3330 }
3331 }
3332 }
3333 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3334 STOP_TIMER("dquant")
3335 }
3336}
3337
791e7b83 3338static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
791e7b83
MN
3339 const int w= b->width;
3340 const int h= b->height;
c97de57c
MN
3341 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3342 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
791e7b83
MN
3343 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3344 int x,y;
ea7d9cd4 3345 START_TIMER
115329f1 3346
93fbdb5a 3347 if(s->qlog == LOSSLESS_QLOG) return;
115329f1 3348
791e7b83
MN
3349 for(y=0; y<h; y++){
3350 for(x=0; x<w; x++){
3351 int i= src[x + y*stride];
3352 if(i<0){
3353 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3354 }else if(i>0){
3355 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3356 }
3357 }
3358 }
ea7d9cd4
MN
3359 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3360 STOP_TIMER("dquant")
3361 }
791e7b83
MN
3362}
3363
3364static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3365 const int w= b->width;
3366 const int h= b->height;
3367 int x,y;
115329f1 3368
791e7b83
MN
3369 for(y=h-1; y>=0; y--){
3370 for(x=w-1; x>=0; x--){
3371 int i= x + y*stride;
115329f1 3372
791e7b83
MN
3373 if(x){
3374 if(use_median){
3375 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3376 else src[i] -= src[i - 1];
3377 }else{
3378 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3379 else src[i] -= src[i - 1];
3380 }
3381 }else{
3382 if(y) src[i] -= src[i - stride];
3383 }
3384 }
3385 }
3386}
3387
66b32bf2 3388static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
a0d1931c 3389 const int w= b->width;
a0d1931c 3390 int x,y;
115329f1 3391
a0d1931c 3392// START_TIMER
115329f1 3393
a0d1931c
Y
3394 DWTELEM * line;
3395 DWTELEM * prev;
115329f1 3396
66b32bf2
Y
3397 if (start_y != 0)
3398 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
115329f1 3399
66b32bf2 3400 for(y=start_y; y<end_y; y++){
a0d1931c
Y
3401 prev = line;
3402// line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3403 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3404 for(x=0; x<w; x++){
3405 if(x){
3406 if(use_median){
3407 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3408 else line[x] += line[x - 1];
3409 }else{
3410 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3411 else line[x] += line[x - 1];
3412 }
3413 }else{
3414 if(y) line[x] += prev[x];
3415 }
3416 }
3417 }
115329f1 3418
a0d1931c
Y
3419// STOP_TIMER("correlate")
3420}
3421
791e7b83
MN
3422static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3423 const int w= b->width;
3424 const int h= b->height;
3425 int x,y;
115329f1 3426
791e7b83
MN
3427 for(y=0; y<h; y++){
3428 for(x=0; x<w; x++){
3429 int i= x + y*stride;
115329f1 3430
791e7b83
MN
3431 if(x){
3432 if(use_median){
3433 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3434 else src[i] += src[i - 1];
3435 }else{
3436 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3437 else src[i] += src[i - 1];
3438 }
3439 }else{
3440 if(y) src[i] += src[i - stride];
3441 }
3442 }
3443 }
3444}
3445
3446static void encode_header(SnowContext *s){
3447 int plane_index, level, orientation;
115329f1
DB
3448 uint8_t kstate[32];
3449
3450 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 3451
28869757 3452 put_rac(&s->c, kstate, s->keyframe);
19aa028d
MN
3453 if(s->keyframe || s->always_reset)
3454 reset_contexts(s);
791e7b83
MN
3455 if(s->keyframe){
3456 put_symbol(&s->c, s->header_state, s->version, 0);
28869757 3457 put_rac(&s->c, s->header_state, s->always_reset);
791e7b83
MN
3458 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3459 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3460 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3461 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
791e7b83
MN
3462 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3463 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
28869757
MN
3464 put_rac(&s->c, s->header_state, s->spatial_scalability);
3465// put_rac(&s->c, s->header_state, s->rate_scalability);
791e7b83
MN
3466
3467 for(plane_index=0; plane_index<2; plane_index++){
3468 for(level=0; level<s->spatial_decomposition_count; level++){
3469 for(orientation=level ? 1:0; orientation<4; orientation++){
3470 if(orientation==2) continue;
3471 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3472 }
3473 }
3474 }
3475 }
3476 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
115329f1
DB
3477 put_symbol(&s->c, s->header_state, s->qlog, 1);
3478 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
791e7b83 3479 put_symbol(&s->c, s->header_state, s->qbias, 1);
155ec6ed 3480 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
791e7b83
MN
3481}
3482
3483static int decode_header(SnowContext *s){
3484 int plane_index, level, orientation;
28869757
MN
3485 uint8_t kstate[32];
3486
115329f1 3487 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 3488
28869757 3489 s->keyframe= get_rac(&s->c, kstate);
19aa028d
MN
3490 if(s->keyframe || s->always_reset)
3491 reset_contexts(s);
791e7b83
MN
3492 if(s->keyframe){
3493 s->version= get_symbol(&s->c, s->header_state, 0);
3494 if(s->version>0){
3495 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3496 return -1;
3497 }
28869757 3498 s->always_reset= get_rac(&s->c, s->header_state);
791e7b83
MN
3499 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3500 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3501 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3502 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
3503 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3504 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
28869757
MN
3505 s->spatial_scalability= get_rac(&s->c, s->header_state);
3506// s->rate_scalability= get_rac(&s->c, s->header_state);
791e7b83
MN
3507
3508 for(plane_index=0; plane_index<3; plane_index++){
3509 for(level=0; level<s->spatial_decomposition_count; level++){
3510 for(orientation=level ? 1:0; orientation<4; orientation++){
3511 int q;
3512 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3513 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3514 else q= get_symbol(&s->c, s->header_state, 1);
3515 s->plane[plane_index].band[level][orientation].qlog= q;