H.264: optimize intra/inter loopfilter decision
[libav.git] / libavcodec / h264_loopfilter.c
CommitLineData
082cf971
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... loop filter
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
2912e87a 5 * This file is part of Libav.
082cf971 6 *
2912e87a 7 * Libav is free software; you can redistribute it and/or
082cf971
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
2912e87a 12 * Libav is distributed in the hope that it will be useful,
082cf971
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
2912e87a 18 * License along with Libav; if not, write to the Free Software
082cf971
MN
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
ba87f080 23 * @file
082cf971
MN
24 * H.264 / AVC / MPEG4 part10 loop filter.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
19769ece 28#include "libavutil/intreadwrite.h"
082cf971
MN
29#include "internal.h"
30#include "dsputil.h"
31#include "avcodec.h"
32#include "mpegvideo.h"
33#include "h264.h"
34#include "mathops.h"
35#include "rectangle.h"
36
082cf971
MN
37//#undef NDEBUG
38#include <assert.h>
39
40/* Deblocking filter (p153) */
41static const uint8_t alpha_table[52*3] = {
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
48 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
49 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
50 80, 90,101,113,127,144,162,182,203,226,
51 255,255,
52 255,255,255,255,255,255,255,255,255,255,255,255,255,
53 255,255,255,255,255,255,255,255,255,255,255,255,255,
54 255,255,255,255,255,255,255,255,255,255,255,255,255,
55 255,255,255,255,255,255,255,255,255,255,255,255,255,
56};
57static const uint8_t beta_table[52*3] = {
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
64 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
65 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
66 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
67 18, 18,
68 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
69 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
70 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
71 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
72};
73static const uint8_t tc0_table[52*3][4] = {
74 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
75 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
76 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
77 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
78 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
79 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
80 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
81 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
82 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
83 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
84 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
85 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
86 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
87 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
88 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
89 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
90 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
91 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
92 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
93 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
94 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
95 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
96 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
97 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
98 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
99 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
100 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
101};
102
a4f6be86
JGG
103/* intra: 0 if this loopfilter call is guaranteed to be inter (bS < 4), 1 if it might be intra (bS == 4) */
104static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, H264Context *h, int intra ) {
44ca80df
OA
105 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
106 const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
258b60c2 107 const int alpha = alpha_table[index_a];
44ca80df 108 const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
082cf971
MN
109 if (alpha ==0 || beta == 0) return;
110
a4f6be86 111 if( bS[0] < 4 || !intra ) {
082cf971 112 int8_t tc[4];
258b60c2
MN
113 tc[0] = tc0_table[index_a][bS[0]];
114 tc[1] = tc0_table[index_a][bS[1]];
115 tc[2] = tc0_table[index_a][bS[2]];
116 tc[3] = tc0_table[index_a][bS[3]];
4693b031 117 h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
082cf971 118 } else {
4693b031 119 h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
082cf971
MN
120 }
121}
a4f6be86 122static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, H264Context *h, int intra ) {
44ca80df
OA
123 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
124 const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
258b60c2 125 const int alpha = alpha_table[index_a];
44ca80df 126 const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
082cf971
MN
127 if (alpha ==0 || beta == 0) return;
128
a4f6be86 129 if( bS[0] < 4 || !intra ) {
082cf971 130 int8_t tc[4];
258b60c2
MN
131 tc[0] = tc0_table[index_a][bS[0]]+1;
132 tc[1] = tc0_table[index_a][bS[1]]+1;
133 tc[2] = tc0_table[index_a][bS[2]]+1;
134 tc[3] = tc0_table[index_a][bS[3]]+1;
4693b031 135 h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
082cf971 136 } else {
4693b031 137 h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
082cf971
MN
138 }
139}
140
a4f6be86 141static void av_always_inline filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, const int16_t bS[7], int bsi, int qp, int intra ) {
44ca80df
OA
142 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
143 int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
0c32e19d 144 int alpha = alpha_table[index_a];
44ca80df 145 int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
dd561441 146 if (alpha ==0 || beta == 0) return;
082cf971 147
a4f6be86 148 if( bS[0] < 4 || !intra ) {
dd561441
RB
149 int8_t tc[4];
150 tc[0] = tc0_table[index_a][bS[0*bsi]];
151 tc[1] = tc0_table[index_a][bS[1*bsi]];
152 tc[2] = tc0_table[index_a][bS[2*bsi]];
153 tc[3] = tc0_table[index_a][bS[3*bsi]];
154 h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
155 } else {
156 h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
082cf971
MN
157 }
158}
a4f6be86 159static void av_always_inline filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, const int16_t bS[7], int bsi, int qp, int intra ) {
44ca80df
OA
160 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
161 int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
0c32e19d 162 int alpha = alpha_table[index_a];
44ca80df 163 int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
dd561441 164 if (alpha ==0 || beta == 0) return;
082cf971 165
a4f6be86 166 if( bS[0] < 4 || !intra ) {
dd561441
RB
167 int8_t tc[4];
168 tc[0] = tc0_table[index_a][bS[0*bsi]] + 1;
169 tc[1] = tc0_table[index_a][bS[1*bsi]] + 1;
170 tc[2] = tc0_table[index_a][bS[2*bsi]] + 1;
171 tc[3] = tc0_table[index_a][bS[3*bsi]] + 1;
172 h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc);
173 } else {
174 h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta);
082cf971
MN
175 }
176}
177
a4f6be86 178static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, H264Context *h, int intra ) {
44ca80df
OA
179 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
180 const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
258b60c2 181 const int alpha = alpha_table[index_a];
44ca80df 182 const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
082cf971
MN
183 if (alpha ==0 || beta == 0) return;
184
a4f6be86 185 if( bS[0] < 4 || !intra ) {
082cf971 186 int8_t tc[4];
258b60c2
MN
187 tc[0] = tc0_table[index_a][bS[0]];
188 tc[1] = tc0_table[index_a][bS[1]];
189 tc[2] = tc0_table[index_a][bS[2]];
190 tc[3] = tc0_table[index_a][bS[3]];
4693b031 191 h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
082cf971 192 } else {
4693b031 193 h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
082cf971
MN
194 }
195}
196
a4f6be86 197static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, H264Context *h, int intra ) {
44ca80df
OA
198 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
199 const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
258b60c2 200 const int alpha = alpha_table[index_a];
44ca80df 201 const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
082cf971
MN
202 if (alpha ==0 || beta == 0) return;
203
a4f6be86 204 if( bS[0] < 4 || !intra ) {
082cf971 205 int8_t tc[4];
258b60c2
MN
206 tc[0] = tc0_table[index_a][bS[0]]+1;
207 tc[1] = tc0_table[index_a][bS[1]]+1;
208 tc[2] = tc0_table[index_a][bS[2]]+1;
209 tc[3] = tc0_table[index_a][bS[3]]+1;
4693b031 210 h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
082cf971 211 } else {
4693b031 212 h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
082cf971
MN
213 }
214}
215
216void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
217 MpegEncContext * const s = &h->s;
d5c30c86 218 int mb_xy;
4320a309 219 int mb_type, left_type, top_type;
6a2176aa 220 int qp, qp0, qp1, qpc, qpc0, qpc1;
7b442ad9 221 int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
84153d18 222 int chroma444 = CHROMA444;
082cf971
MN
223
224 mb_xy = h->mb_xy;
225
4320a309 226 if(!h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
082cf971
MN
227 ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
228 return;
229 }
230 assert(!FRAME_MBAFF);
556f8a06 231 left_type= h->left_type[LTOP];
4320a309 232 top_type= h->top_type;
082cf971 233
657ccb5a
DB
234 mb_type = s->current_picture.f.mb_type[mb_xy];
235 qp = s->current_picture.f.qscale_table[mb_xy];
236 qp0 = s->current_picture.f.qscale_table[mb_xy - 1];
237 qp1 = s->current_picture.f.qscale_table[h->top_mb_xy];
082cf971
MN
238 qpc = get_chroma_qp( h, 0, qp );
239 qpc0 = get_chroma_qp( h, 0, qp0 );
240 qpc1 = get_chroma_qp( h, 0, qp1 );
241 qp0 = (qp + qp0 + 1) >> 1;
242 qp1 = (qp + qp1 + 1) >> 1;
243 qpc0 = (qpc + qpc0 + 1) >> 1;
244 qpc1 = (qpc + qpc1 + 1) >> 1;
082cf971
MN
245
246 if( IS_INTRA(mb_type) ) {
a625e132
JGG
247 static const int16_t bS4[4] = {4,4,4,4};
248 static const int16_t bS3[4] = {3,3,3,3};
249 const int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
34032e26 250 if(left_type)
a4f6be86 251 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h, 1);
082cf971 252 if( IS_8x8DCT(mb_type) ) {
a4f6be86 253 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h, 0);
4320a309 254 if(top_type){
a4f6be86 255 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h, 1);
4320a309 256 }
a4f6be86 257 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h, 0);
082cf971 258 } else {
a4f6be86
JGG
259 filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h, 0);
260 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h, 0);
261 filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h, 0);
4320a309 262 if(top_type){
a4f6be86 263 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h, 1);
4320a309 264 }
a4f6be86
JGG
265 filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h, 0);
266 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h, 0);
267 filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h, 0);
082cf971 268 }
7b442ad9 269 if(chroma){
84153d18
JGG
270 if(chroma444){
271 if(left_type){
a4f6be86
JGG
272 filter_mb_edgev( &img_cb[4*0], linesize, bS4, qpc0, h, 1);
273 filter_mb_edgev( &img_cr[4*0], linesize, bS4, qpc0, h, 1);
84153d18
JGG
274 }
275 if( IS_8x8DCT(mb_type) ) {
a4f6be86
JGG
276 filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h, 0);
277 filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h, 0);
4320a309 278 if(top_type){
a4f6be86
JGG
279 filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h,1 );
280 filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h,1 );
4320a309 281 }
a4f6be86
JGG
282 filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h, 0);
283 filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h, 0);
84153d18 284 } else {
a4f6be86
JGG
285 filter_mb_edgev( &img_cb[4*1], linesize, bS3, qpc, h, 0);
286 filter_mb_edgev( &img_cr[4*1], linesize, bS3, qpc, h, 0);
287 filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h, 0);
288 filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h, 0);
289 filter_mb_edgev( &img_cb[4*3], linesize, bS3, qpc, h, 0);
290 filter_mb_edgev( &img_cr[4*3], linesize, bS3, qpc, h, 0);
4320a309 291 if(top_type){
a4f6be86
JGG
292 filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h, 1);
293 filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h, 1);
4320a309 294 }
a4f6be86
JGG
295 filter_mb_edgeh( &img_cb[4*1*linesize], linesize, bS3, qpc, h, 0);
296 filter_mb_edgeh( &img_cr[4*1*linesize], linesize, bS3, qpc, h, 0);
297 filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h, 0);
298 filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h, 0);
299 filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, h, 0);
300 filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, h, 0);
84153d18
JGG
301 }
302 }else{
303 if(left_type){
a4f6be86
JGG
304 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h, 1);
305 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h, 1);
84153d18 306 }
a4f6be86
JGG
307 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h, 0);
308 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h, 0);
4320a309 309 if(top_type){
a4f6be86
JGG
310 filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h, 1);
311 filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h, 1);
4320a309 312 }
a4f6be86
JGG
313 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h, 0);
314 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h, 0);
7b442ad9 315 }
0a32508d 316 }
082cf971
MN
317 return;
318 } else {
40d11227 319 LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]);
082cf971 320 int edges;
7f7dc4fb 321 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 && !chroma444 ) {
082cf971 322 edges = 4;
19769ece
MR
323 AV_WN64A(bS[0][0], 0x0002000200020002ULL);
324 AV_WN64A(bS[0][2], 0x0002000200020002ULL);
325 AV_WN64A(bS[1][0], 0x0002000200020002ULL);
326 AV_WN64A(bS[1][2], 0x0002000200020002ULL);
082cf971 327 } else {
012dbcce 328 int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0;
556f8a06 329 int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
77821e11
MN
330 int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
331 edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
4693b031 332 h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
50eb40a7 333 h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
082cf971 334 }
4e992796 335 if( IS_INTRA(left_type) )
19769ece 336 AV_WN64A(bS[0][0], 0x0004000400040004ULL);
4320a309 337 if( IS_INTRA(top_type) )
19769ece 338 AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL);
082cf971 339
a4f6be86 340#define FILTER(hv,dir,edge,intra)\
19769ece 341 if(AV_RN64A(bS[dir][edge])) { \
a4f6be86 342 filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h, intra );\
84153d18
JGG
343 if(chroma){\
344 if(chroma444){\
a4f6be86
JGG
345 filter_mb_edge##hv( &img_cb[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qpc : qpc##dir, h, intra );\
346 filter_mb_edge##hv( &img_cr[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qpc : qpc##dir, h, intra );\
84153d18 347 } else if(!(edge&1)) {\
a4f6be86
JGG
348 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h, intra );\
349 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h, intra );\
84153d18 350 }\
082cf971
MN
351 }\
352 }
34032e26 353 if(left_type)
a4f6be86 354 FILTER(v,0,0,1);
082cf971 355 if( edges == 1 ) {
4320a309 356 if(top_type)
a4f6be86 357 FILTER(h,1,0,1);
082cf971 358 } else if( IS_8x8DCT(mb_type) ) {
a4f6be86 359 FILTER(v,0,2,0);
4320a309 360 if(top_type)
a4f6be86
JGG
361 FILTER(h,1,0,1);
362 FILTER(h,1,2,0);
082cf971 363 } else {
a4f6be86
JGG
364 FILTER(v,0,1,0);
365 FILTER(v,0,2,0);
366 FILTER(v,0,3,0);
4320a309 367 if(top_type)
a4f6be86
JGG
368 FILTER(h,1,0,1);
369 FILTER(h,1,1,0);
370 FILTER(h,1,2,0);
371 FILTER(h,1,3,0);
082cf971
MN
372 }
373#undef FILTER
374 }
375}
376
e814817b 377static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
ca7c784f 378 int v;
e814817b 379
26468148
MN
380 v= h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx];
381 if(!v && h->ref_cache[0][b_idx]!=-1)
382 v= h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
383 FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
fabd704b
MN
384
385 if(h->list_count==2){
386 if(!v)
5364db28
MN
387 v = h->ref_cache[1][b_idx] != h->ref_cache[1][bn_idx] |
388 h->mv_cache[1][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U |
389 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit;
390
391 if(v){
392 if(h->ref_cache[0][b_idx] != h->ref_cache[1][bn_idx] |
393 h->ref_cache[1][b_idx] != h->ref_cache[0][bn_idx])
394 return 1;
395 return
396 h->mv_cache[0][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U |
397 FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit |
398 h->mv_cache[1][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
399 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
400 }
fabd704b 401 }
e814817b
MN
402
403 return v;
404}
082cf971 405
7b442ad9 406static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma, int chroma444, int dir) {
082cf971
MN
407 MpegEncContext * const s = &h->s;
408 int edge;
c90b9442 409 int chroma_qp_avg[2];
082cf971 410 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
556f8a06 411 const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type;
082cf971 412
082cf971 413 // how often to recheck mv-based bS when iterating between edges
31f6e3c1
MN
414 static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1},
415 {0,3,1,1,3,3,3,3}};
416 const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7];
1cc2d211
MN
417 const int edges = mask_edge== 3 && !(h->cbp&15) ? 1 : 4;
418
082cf971
MN
419 // how often to recheck mv-based bS when iterating along each edge
420 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
421
3b849245 422 if(mbm_type && !first_vertical_edge_done){
082cf971 423
b3047673 424 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0)
b6302d0c
MN
425 && IS_INTERLACED(mbm_type&~mb_type)
426 ) {
427 // This is a special case in the norm where the filtering must
428 // be done twice (one each of the field) even if we are in a
429 // frame macroblock.
430 //
431 unsigned int tmp_linesize = 2 * linesize;
432 unsigned int tmp_uvlinesize = 2 * uvlinesize;
433 int mbn_xy = mb_xy - 2 * s->mb_stride;
434 int j;
435
436 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
84dc2d8a 437 DECLARE_ALIGNED(8, int16_t, bS)[4];
b6302d0c 438 int qp;
657ccb5a 439 if (IS_INTRA(mb_type | s->current_picture.f.mb_type[mbn_xy])) {
19769ece 440 AV_WN64A(bS, 0x0003000300030003ULL);
b6302d0c 441 } else {
657ccb5a 442 if (!CABAC && IS_8x8DCT(s->current_picture.f.mb_type[mbn_xy])) {
2702a6f1
JGG
443 bS[0]= 1+((h->cbp_table[mbn_xy] & 0x4000)||h->non_zero_count_cache[scan8[0]+0]);
444 bS[1]= 1+((h->cbp_table[mbn_xy] & 0x4000)||h->non_zero_count_cache[scan8[0]+1]);
445 bS[2]= 1+((h->cbp_table[mbn_xy] & 0x8000)||h->non_zero_count_cache[scan8[0]+2]);
446 bS[3]= 1+((h->cbp_table[mbn_xy] & 0x8000)||h->non_zero_count_cache[scan8[0]+3]);
9873ae0d 447 }else{
c90b9442 448 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 3*4;
b6302d0c
MN
449 int i;
450 for( i = 0; i < 4; i++ ) {
451 bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]);
452 }
9873ae0d 453 }
082cf971 454 }
b6302d0c
MN
455 // Do not use s->qscale as luma quantizer because it has not the same
456 // value in IPCM macroblocks.
657ccb5a 457 qp = (s->current_picture.f.qscale_table[mb_xy] + s->current_picture.f.qscale_table[mbn_xy] + 1) >> 1;
b6302d0c
MN
458 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
459 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
a4f6be86 460 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h, 0 );
657ccb5a
DB
461 chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mbn_xy]) + 1) >> 1;
462 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp(h, 1, s->current_picture.f.qscale_table[mbn_xy]) + 1) >> 1;
7b442ad9
JGG
463 if (chroma) {
464 if (chroma444) {
a4f6be86
JGG
465 filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h, 0);
466 filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h, 0);
7b442ad9 467 } else {
a4f6be86
JGG
468 filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h, 0);
469 filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h, 0);
7b442ad9 470 }
c90b9442 471 }
082cf971 472 }
b3047673 473 }else{
84dc2d8a 474 DECLARE_ALIGNED(8, int16_t, bS)[4];
b6302d0c 475 int qp;
980bcc55 476
b6302d0c 477 if( IS_INTRA(mb_type|mbm_type)) {
19769ece 478 AV_WN64A(bS, 0x0003000300030003ULL);
b6302d0c
MN
479 if ( (!IS_INTERLACED(mb_type|mbm_type))
480 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
481 )
19769ece 482 AV_WN64A(bS, 0x0004000400040004ULL);
b6302d0c 483 } else {
78998bf2 484 int i;
b6302d0c 485 int mv_done;
980bcc55 486
37b2b0d6 487 if( dir && FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) {
19769ece 488 AV_WN64A(bS, 0x0001000100010001ULL);
b6302d0c 489 mv_done = 1;
980bcc55 490 }
b6302d0c
MN
491 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
492 int b_idx= 8 + 4;
493 int bn_idx= b_idx - (dir ? 8:1);
980bcc55 494
e814817b 495 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, 8 + 4, bn_idx, mvy_limit);
b6302d0c
MN
496 mv_done = 1;
497 }
498 else
499 mv_done = 0;
500
501 for( i = 0; i < 4; i++ ) {
502 int x = dir == 0 ? 0 : i;
503 int y = dir == 0 ? i : 0;
504 int b_idx= 8 + 4 + x + 8*y;
505 int bn_idx= b_idx - (dir ? 8:1);
506
507 if( h->non_zero_count_cache[b_idx] |
508 h->non_zero_count_cache[bn_idx] ) {
509 bS[i] = 2;
510 }
511 else if(!mv_done)
512 {
e814817b 513 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
980bcc55
MN
514 }
515 }
516 }
980bcc55 517
b6302d0c
MN
518 /* Filter edge */
519 // Do not use s->qscale as luma quantizer because it has not the same
520 // value in IPCM macroblocks.
521 if(bS[0]+bS[1]+bS[2]+bS[3]){
657ccb5a 522 qp = (s->current_picture.f.qscale_table[mb_xy] + s->current_picture.f.qscale_table[mbm_xy] + 1) >> 1;
b6302d0c
MN
523 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
524 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
525 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
657ccb5a
DB
526 chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mbm_xy]) + 1) >> 1;
527 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp(h, 1, s->current_picture.f.qscale_table[mbm_xy]) + 1) >> 1;
b6302d0c 528 if( dir == 0 ) {
a4f6be86 529 filter_mb_edgev( &img_y[0], linesize, bS, qp, h, 1 );
7b442ad9 530 if (chroma) {
c90b9442 531 if (chroma444) {
a4f6be86
JGG
532 filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h, 1);
533 filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h, 1);
c90b9442 534 } else {
a4f6be86
JGG
535 filter_mb_edgecv( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h, 1);
536 filter_mb_edgecv( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h, 1);
c90b9442 537 }
b6302d0c
MN
538 }
539 } else {
a4f6be86 540 filter_mb_edgeh( &img_y[0], linesize, bS, qp, h, 1 );
7b442ad9 541 if (chroma) {
c90b9442 542 if (chroma444) {
a4f6be86
JGG
543 filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h, 1);
544 filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h, 1);
c90b9442 545 } else {
a4f6be86
JGG
546 filter_mb_edgech( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h, 1);
547 filter_mb_edgech( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h, 1);
c90b9442 548 }
b6302d0c
MN
549 }
550 }
980bcc55
MN
551 }
552 }
b3047673
MN
553 }
554
980bcc55
MN
555 /* Calculate bS */
556 for( edge = 1; edge < edges; edge++ ) {
84dc2d8a 557 DECLARE_ALIGNED(8, int16_t, bS)[4];
082cf971
MN
558 int qp;
559
933bea77 560 if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type)
082cf971
MN
561 continue;
562
980bcc55 563 if( IS_INTRA(mb_type)) {
19769ece 564 AV_WN64A(bS, 0x0003000300030003ULL);
082cf971 565 } else {
78998bf2 566 int i;
082cf971
MN
567 int mv_done;
568
569 if( edge & mask_edge ) {
19769ece 570 AV_ZERO64(bS);
082cf971
MN
571 mv_done = 1;
572 }
980bcc55 573 else if( mask_par0 ) {
082cf971
MN
574 int b_idx= 8 + 4 + edge * (dir ? 8:1);
575 int bn_idx= b_idx - (dir ? 8:1);
082cf971 576
e814817b 577 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, b_idx, bn_idx, mvy_limit);
082cf971
MN
578 mv_done = 1;
579 }
580 else
581 mv_done = 0;
582
583 for( i = 0; i < 4; i++ ) {
584 int x = dir == 0 ? edge : i;
585 int y = dir == 0 ? i : edge;
586 int b_idx= 8 + 4 + x + 8*y;
587 int bn_idx= b_idx - (dir ? 8:1);
588
589 if( h->non_zero_count_cache[b_idx] |
590 h->non_zero_count_cache[bn_idx] ) {
591 bS[i] = 2;
592 }
593 else if(!mv_done)
594 {
e814817b 595 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
082cf971
MN
596 }
597 }
598
599 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
600 continue;
601 }
602
603 /* Filter edge */
604 // Do not use s->qscale as luma quantizer because it has not the same
605 // value in IPCM macroblocks.
657ccb5a 606 qp = s->current_picture.f.qscale_table[mb_xy];
c988f975 607 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
082cf971 608 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7931bb2a 609 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
082cf971 610 if( dir == 0 ) {
a4f6be86 611 filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h, 0 );
7b442ad9
JGG
612 if (chroma) {
613 if (chroma444) {
a4f6be86
JGG
614 filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h, 0);
615 filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h, 0);
7b442ad9 616 } else if( (edge&1) == 0 ) {
a4f6be86
JGG
617 filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h, 0);
618 filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h, 0);
7b442ad9 619 }
082cf971
MN
620 }
621 } else {
a4f6be86 622 filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h, 0 );
7b442ad9
JGG
623 if (chroma) {
624 if (chroma444) {
a4f6be86
JGG
625 filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h, 0);
626 filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h, 0);
7b442ad9 627 } else if( (edge&1) == 0 ) {
a4f6be86
JGG
628 filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h, 0);
629 filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h, 0);
7b442ad9 630 }
082cf971
MN
631 }
632 }
633 }
634}
635
636void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
637 MpegEncContext * const s = &h->s;
638 const int mb_xy= mb_x + mb_y*s->mb_stride;
657ccb5a 639 const int mb_type = s->current_picture.f.mb_type[mb_xy];
082cf971
MN
640 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
641 int first_vertical_edge_done = 0;
642 av_unused int dir;
7b442ad9 643 int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
082cf971 644
082cf971 645 if (FRAME_MBAFF
082cf971 646 // and current and left pair do not have the same interlaced type
556f8a06 647 && IS_INTERLACED(mb_type^h->left_type[LTOP])
8670f84c 648 // and left mb is in available to us
556f8a06 649 && h->left_type[LTOP]) {
082cf971
MN
650 /* First vertical edge is different in MBAFF frames
651 * There are 8 different bS to compute and 2 different Qp
652 */
84dc2d8a 653 DECLARE_ALIGNED(8, int16_t, bS)[8];
082cf971
MN
654 int qp[2];
655 int bqp[2];
656 int rqp[2];
657 int mb_qp, mbn0_qp, mbn1_qp;
658 int i;
659 first_vertical_edge_done = 1;
660
19769ece
MR
661 if( IS_INTRA(mb_type) ) {
662 AV_WN64A(&bS[0], 0x0004000400040004ULL);
663 AV_WN64A(&bS[4], 0x0004000400040004ULL);
664 } else {
e470ef76
MN
665 static const uint8_t offset[2][2][8]={
666 {
c90b9442
JGG
667 {3+4*0, 3+4*0, 3+4*0, 3+4*0, 3+4*1, 3+4*1, 3+4*1, 3+4*1},
668 {3+4*2, 3+4*2, 3+4*2, 3+4*2, 3+4*3, 3+4*3, 3+4*3, 3+4*3},
e470ef76 669 },{
c90b9442
JGG
670 {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
671 {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
e470ef76
MN
672 }
673 };
674 const uint8_t *off= offset[MB_FIELD][mb_y&1];
082cf971 675 for( i = 0; i < 8; i++ ) {
16e5e39a 676 int j= MB_FIELD ? i>>2 : i&1;
556f8a06
JGG
677 int mbn_xy = h->left_mb_xy[LEFT(j)];
678 int mbn_type= h->left_type[LEFT(j)];
082cf971 679
16e5e39a 680 if( IS_INTRA( mbn_type ) )
082cf971 681 bS[i] = 4;
16e5e39a
MN
682 else{
683 bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] |
684 ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ?
2702a6f1 685 (h->cbp_table[mbn_xy] & (((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2) << 12))
082cf971 686 :
e470ef76 687 h->non_zero_count[mbn_xy][ off[i] ]));
16e5e39a 688 }
082cf971
MN
689 }
690 }
691
657ccb5a
DB
692 mb_qp = s->current_picture.f.qscale_table[mb_xy];
693 mbn0_qp = s->current_picture.f.qscale_table[h->left_mb_xy[0]];
694 mbn1_qp = s->current_picture.f.qscale_table[h->left_mb_xy[1]];
082cf971
MN
695 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
696 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
697 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
698 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
699 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
700 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
701 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
702 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
703 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
704 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
705
706 /* Filter edge */
707 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
708 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
bffe82f5 709 if(MB_FIELD){
a4f6be86
JGG
710 filter_mb_mbaff_edgev ( h, img_y , linesize, bS , 1, qp [0], 1 );
711 filter_mb_mbaff_edgev ( h, img_y + 8* linesize, linesize, bS+4, 1, qp [1], 1 );
7b442ad9 712 if (chroma){
7c9079ab 713 if (CHROMA444) {
a4f6be86
JGG
714 filter_mb_mbaff_edgev ( h, img_cb, uvlinesize, bS , 1, bqp[0], 1 );
715 filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], 1 );
716 filter_mb_mbaff_edgev ( h, img_cr, uvlinesize, bS , 1, rqp[0], 1 );
717 filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], 1 );
7c9079ab 718 }else{
a4f6be86
JGG
719 filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0], 1 );
720 filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1], 1 );
721 filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0], 1 );
722 filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1], 1 );
7c9079ab 723 }
7b442ad9 724 }
bffe82f5 725 }else{
a4f6be86
JGG
726 filter_mb_mbaff_edgev ( h, img_y , 2* linesize, bS , 2, qp [0], 1 );
727 filter_mb_mbaff_edgev ( h, img_y + linesize, 2* linesize, bS+1, 2, qp [1], 1 );
7b442ad9 728 if (chroma){
7c9079ab 729 if (CHROMA444) {
a4f6be86
JGG
730 filter_mb_mbaff_edgev ( h, img_cb, 2*uvlinesize, bS , 2, bqp[0], 1 );
731 filter_mb_mbaff_edgev ( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1], 1 );
732 filter_mb_mbaff_edgev ( h, img_cr, 2*uvlinesize, bS , 2, rqp[0], 1 );
733 filter_mb_mbaff_edgev ( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1], 1 );
7c9079ab 734 }else{
a4f6be86
JGG
735 filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0], 1 );
736 filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1], 1 );
737 filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0], 1 );
738 filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1], 1 );
7c9079ab 739 }
7b442ad9 740 }
bffe82f5 741 }
082cf971
MN
742 }
743
744#if CONFIG_SMALL
745 for( dir = 0; dir < 2; dir++ )
7b442ad9 746 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, chroma, CHROMA444, dir);
082cf971 747#else
7b442ad9
JGG
748 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, chroma, CHROMA444, 0);
749 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, chroma, CHROMA444, 1);
082cf971
MN
750#endif
751}