Split global .gitignore file into per-directory files
[libav.git] / libavcodec / vc1_loopfilter.c
CommitLineData
04d14c9b
LB
1/*
2 * VC-1 and WMV3 decoder
3 * Copyright (c) 2011 Mashiat Sarker Shakkhar
4 * Copyright (c) 2006-2007 Konstantin Shishkov
5 * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
6 *
7 * This file is part of Libav.
8 *
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file
26 * VC-1 and WMV3 loopfilter
27 */
28
29#include "avcodec.h"
30#include "mpegvideo.h"
31#include "vc1.h"
32#include "vc1dsp.h"
33
34void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
35{
36 MpegEncContext *s = &v->s;
37 int j;
38 if (!s->first_slice_line) {
39 v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
40 if (s->mb_x)
41 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
42 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
43 for (j = 0; j < 2; j++) {
44 v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq);
45 if (s->mb_x)
46 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
47 }
48 }
49 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq);
50
51 if (s->mb_y == s->end_mb_y - 1) {
52 if (s->mb_x) {
53 v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
54 v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
55 v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
56 }
57 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
58 }
59}
60
61void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
62{
63 MpegEncContext *s = &v->s;
64 int j;
65
66 /* The loopfilter runs 1 row and 1 column behind the overlap filter, which
67 * means it runs two rows/cols behind the decoding loop. */
68 if (!s->first_slice_line) {
69 if (s->mb_x) {
70 if (s->mb_y >= s->start_mb_y + 2) {
71 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
72
73 if (s->mb_x >= 2)
74 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq);
75 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq);
76 for (j = 0; j < 2; j++) {
77 v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
78 if (s->mb_x >= 2) {
79 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq);
80 }
81 }
82 }
83 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq);
84 }
85
86 if (s->mb_x == s->mb_width - 1) {
87 if (s->mb_y >= s->start_mb_y + 2) {
88 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
89
90 if (s->mb_x)
91 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq);
92 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq);
93 for (j = 0; j < 2; j++) {
94 v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
95 if (s->mb_x >= 2) {
96 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize, s->uvlinesize, pq);
97 }
98 }
99 }
100 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq);
101 }
102
103 if (s->mb_y == s->end_mb_y) {
104 if (s->mb_x) {
105 if (s->mb_x >= 2)
106 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
107 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq);
108 if (s->mb_x >= 2) {
109 for (j = 0; j < 2; j++) {
110 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
111 }
112 }
113 }
114
115 if (s->mb_x == s->mb_width - 1) {
116 if (s->mb_x)
117 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
118 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
119 if (s->mb_x) {
120 for (j = 0; j < 2; j++) {
121 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
122 }
123 }
124 }
125 }
126 }
127}
128
129void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
130{
131 MpegEncContext *s = &v->s;
132 int mb_pos;
133
134 if (v->condover == CONDOVER_NONE)
135 return;
136
137 mb_pos = s->mb_x + s->mb_y * s->mb_stride;
138
139 /* Within a MB, the horizontal overlap always runs before the vertical.
140 * To accomplish that, we run the H on left and internal borders of the
141 * currently decoded MB. Then, we wait for the next overlap iteration
142 * to do H overlap on the right edge of this MB, before moving over and
143 * running the V overlap. Therefore, the V overlap makes us trail by one
144 * MB col and the H overlap filter makes us trail by one MB row. This
145 * is reflected in the time at which we run the put_pixels loop. */
146 if (v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) {
147 if (s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
148 v->over_flags_plane[mb_pos - 1])) {
149 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1],
150 v->block[v->cur_blk_idx][0]);
151 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3],
152 v->block[v->cur_blk_idx][2]);
7c6eb0a1 153 if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
04d14c9b
LB
154 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4],
155 v->block[v->cur_blk_idx][4]);
156 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5],
157 v->block[v->cur_blk_idx][5]);
158 }
159 }
160 v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0],
161 v->block[v->cur_blk_idx][1]);
162 v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2],
163 v->block[v->cur_blk_idx][3]);
164
165 if (s->mb_x == s->mb_width - 1) {
166 if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
167 v->over_flags_plane[mb_pos - s->mb_stride])) {
168 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2],
169 v->block[v->cur_blk_idx][0]);
170 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3],
171 v->block[v->cur_blk_idx][1]);
7c6eb0a1 172 if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
04d14c9b
LB
173 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4],
174 v->block[v->cur_blk_idx][4]);
175 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5],
176 v->block[v->cur_blk_idx][5]);
177 }
178 }
179 v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0],
180 v->block[v->cur_blk_idx][2]);
181 v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1],
182 v->block[v->cur_blk_idx][3]);
183 }
184 }
185 if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) {
186 if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
187 v->over_flags_plane[mb_pos - s->mb_stride - 1])) {
188 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2],
189 v->block[v->left_blk_idx][0]);
190 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3],
191 v->block[v->left_blk_idx][1]);
7c6eb0a1 192 if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
04d14c9b
LB
193 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4],
194 v->block[v->left_blk_idx][4]);
195 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5],
196 v->block[v->left_blk_idx][5]);
197 }
198 }
199 v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0],
200 v->block[v->left_blk_idx][2]);
201 v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1],
202 v->block[v->left_blk_idx][3]);
203 }
204}
205
206static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num)
207{
208 MpegEncContext *s = &v->s;
209 int mb_cbp = v->cbp[s->mb_x - s->mb_stride],
210 block_cbp = mb_cbp >> (block_num * 4), bottom_cbp,
211 mb_is_intra = v->is_intra[s->mb_x - s->mb_stride],
212 block_is_intra = mb_is_intra >> (block_num * 4), bottom_is_intra;
213 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
214 uint8_t *dst;
215
216 if (block_num > 3) {
217 dst = s->dest[block_num - 3];
218 } else {
219 dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 8) * linesize;
220 }
221 if (s->mb_y != s->end_mb_y || block_num < 2) {
222 int16_t (*mv)[2];
223 int mv_stride;
224
225 if (block_num > 3) {
226 bottom_cbp = v->cbp[s->mb_x] >> (block_num * 4);
51946d2d 227 bottom_is_intra = v->is_intra[s->mb_x] >> block_num;
04d14c9b
LB
228 mv = &v->luma_mv[s->mb_x - s->mb_stride];
229 mv_stride = s->mb_stride;
230 } else {
231 bottom_cbp = (block_num < 2) ? (mb_cbp >> ((block_num + 2) * 4))
232 : (v->cbp[s->mb_x] >> ((block_num - 2) * 4));
51946d2d
MN
233 bottom_is_intra = (block_num < 2) ? (mb_is_intra >> (block_num + 2))
234 : (v->is_intra[s->mb_x] >> (block_num - 2));
04d14c9b
LB
235 mv_stride = s->b8_stride;
236 mv = &s->current_picture.motion_val[0][s->block_index[block_num] - 2 * mv_stride];
237 }
238
239 if (bottom_is_intra & 1 || block_is_intra & 1 ||
240 mv[0][0] != mv[mv_stride][0] || mv[0][1] != mv[mv_stride][1]) {
241 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
242 } else {
243 idx = ((bottom_cbp >> 2) | block_cbp) & 3;
244 if (idx == 3) {
245 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
246 } else if (idx) {
247 if (idx == 1)
248 v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
249 else
250 v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq);
251 }
252 }
253 }
254
255 dst -= 4 * linesize;
256 ttblk = (v->ttblk[s->mb_x - s->mb_stride] >> (block_num * 4)) & 0xF;
257 if (ttblk == TT_4X4 || ttblk == TT_8X4) {
258 idx = (block_cbp | (block_cbp >> 2)) & 3;
259 if (idx == 3) {
260 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
261 } else if (idx) {
262 if (idx == 1)
263 v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
264 else
265 v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq);
266 }
267 }
268}
269
270static av_always_inline void vc1_apply_p_h_loop_filter(VC1Context *v, int block_num)
271{
272 MpegEncContext *s = &v->s;
273 int mb_cbp = v->cbp[s->mb_x - 1 - s->mb_stride],
274 block_cbp = mb_cbp >> (block_num * 4), right_cbp,
275 mb_is_intra = v->is_intra[s->mb_x - 1 - s->mb_stride],
51946d2d 276 block_is_intra = mb_is_intra >> block_num, right_is_intra;
04d14c9b
LB
277 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
278 uint8_t *dst;
279
280 if (block_num > 3) {
281 dst = s->dest[block_num - 3] - 8 * linesize;
282 } else {
283 dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 16) * linesize - 8;
284 }
285
286 if (s->mb_x != s->mb_width || !(block_num & 5)) {
287 int16_t (*mv)[2];
288
289 if (block_num > 3) {
290 right_cbp = v->cbp[s->mb_x - s->mb_stride] >> (block_num * 4);
51946d2d 291 right_is_intra = v->is_intra[s->mb_x - s->mb_stride] >> block_num;
04d14c9b
LB
292 mv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
293 } else {
294 right_cbp = (block_num & 1) ? (v->cbp[s->mb_x - s->mb_stride] >> ((block_num - 1) * 4))
295 : (mb_cbp >> ((block_num + 1) * 4));
51946d2d
MN
296 right_is_intra = (block_num & 1) ? (v->is_intra[s->mb_x - s->mb_stride] >> (block_num - 1))
297 : (mb_is_intra >> (block_num + 1));
04d14c9b
LB
298 mv = &s->current_picture.motion_val[0][s->block_index[block_num] - s->b8_stride * 2 - 2];
299 }
300 if (block_is_intra & 1 || right_is_intra & 1 || mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1]) {
301 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
302 } else {
303 idx = ((right_cbp >> 1) | block_cbp) & 5; // FIXME check
304 if (idx == 5) {
305 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
306 } else if (idx) {
307 if (idx == 1)
308 v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize, linesize, v->pq);
309 else
310 v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq);
311 }
312 }
313 }
314
315 dst -= 4;
316 ttblk = (v->ttblk[s->mb_x - s->mb_stride - 1] >> (block_num * 4)) & 0xf;
317 if (ttblk == TT_4X4 || ttblk == TT_4X8) {
318 idx = (block_cbp | (block_cbp >> 1)) & 5;
319 if (idx == 5) {
320 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
321 } else if (idx) {
322 if (idx == 1)
323 v->vc1dsp.vc1_h_loop_filter4(dst + linesize * 4, linesize, v->pq);
324 else
325 v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq);
326 }
327 }
328}
329
330void ff_vc1_apply_p_loop_filter(VC1Context *v)
331{
332 MpegEncContext *s = &v->s;
333 int i;
334
335 for (i = 0; i < 6; i++) {
336 vc1_apply_p_v_loop_filter(v, i);
337 }
338
339 /* V always precedes H, therefore we run H one MB before V;
340 * at the end of a row, we catch up to complete the row */
341 if (s->mb_x) {
342 for (i = 0; i < 6; i++) {
343 vc1_apply_p_h_loop_filter(v, i);
344 }
345 if (s->mb_x == s->mb_width - 1) {
346 s->mb_x++;
347 ff_update_block_index(s);
348 for (i = 0; i < 6; i++) {
349 vc1_apply_p_h_loop_filter(v, i);
350 }
351 }
352 }
353}