Commit | Line | Data |
---|---|---|
1457ab52 MN |
1 | /* |
2 | * Motion estimation | |
3 | * Copyright (c) 2002 Michael Niedermayer | |
4 | * | |
5 | * This library is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU Lesser General Public | |
7 | * License as published by the Free Software Foundation; either | |
8 | * version 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * This library is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Lesser General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Lesser General Public | |
16 | * License along with this library; if not, write to the Free Software | |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 | * | |
19 | */ | |
983e3246 MN |
20 | |
21 | /** | |
22 | * @file motion_est_template.c | |
23 | * Motion estimation template. | |
24 | */ | |
bb198e19 | 25 | //FIXME ref2_y next_pic? |
1457ab52 MN |
26 | //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) |
27 | //Note, the last line is there to kill these ugly unused var warnings | |
bb198e19 | 28 | #define LOAD_COMMON\ |
1457ab52 | 29 | uint32_t * const score_map= s->me.score_map;\ |
1457ab52 MN |
30 | const int time_pp= s->pp_time;\ |
31 | const int time_pb= s->pb_time;\ | |
bb198e19 MN |
32 | const int xmin= s->me.xmin;\ |
33 | const int ymin= s->me.ymin;\ | |
34 | const int xmax= s->me.xmax;\ | |
35 | const int ymax= s->me.ymax;\ | |
36 | uint8_t * const src_y= src_data[0];\ | |
37 | uint8_t * const src_u= src_data[1];\ | |
38 | uint8_t * const src_v= src_data[2];\ | |
39 | uint8_t * const ref_y= ref_data[0];\ | |
40 | uint8_t * const ref_u= ref_data[1];\ | |
41 | uint8_t * const ref_v= ref_data[2];\ | |
1457ab52 MN |
42 | op_pixels_func (*hpel_put)[4];\ |
43 | op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\ | |
44 | op_pixels_func (*chroma_hpel_put)[4];\ | |
45 | qpel_mc_func (*qpel_put)[16];\ | |
46 | qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\ | |
3db320ea | 47 | const __attribute__((unused)) int unu= time_pp + time_pb + (size_t)src_u + (size_t)src_v + (size_t)ref_u + (size_t)ref_v\ |
bb198e19 MN |
48 | + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map\ |
49 | + xmin + xmax + ymin + ymax;\ | |
1457ab52 MN |
50 | if(s->no_rounding /*FIXME b_type*/){\ |
51 | hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\ | |
52 | chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\ | |
53 | qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];\ | |
54 | }else{\ | |
55 | hpel_put=& s->dsp.put_pixels_tab[size];\ | |
56 | chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];\ | |
57 | qpel_put= &s->dsp.put_qpel_pixels_tab[size];\ | |
58 | } | |
59 | ||
60 | ||
61 | #ifdef CMP_HPEL | |
62 | ||
63 | #define CHECK_HALF_MV(dx, dy, x, y)\ | |
64 | {\ | |
65 | const int hx= 2*(x)+(dx);\ | |
66 | const int hy= 2*(y)+(dy);\ | |
67 | CMP_HPEL(d, dx, dy, x, y, size);\ | |
68 | d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ | |
69 | COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ | |
70 | } | |
71 | ||
72 | #if 0 | |
73 | static int RENAME(hpel_motion_search)(MpegEncContext * s, | |
74 | int *mx_ptr, int *my_ptr, int dmin, | |
bb198e19 MN |
75 | int pred_x, int pred_y, uint8_t *ref_data[3], |
76 | int size, uint8_t * const mv_penalty) | |
1457ab52 | 77 | { |
1457ab52 MN |
78 | const int xx = 16 * s->mb_x + 8*(n&1); |
79 | const int yy = 16 * s->mb_y + 8*(n>>1); | |
80 | const int mx = *mx_ptr; | |
81 | const int my = *my_ptr; | |
b07a5980 | 82 | const int penalty_factor= s->me.sub_penalty_factor; |
1457ab52 | 83 | |
bb198e19 | 84 | LOAD_COMMON |
1457ab52 MN |
85 | |
86 | // INIT; | |
87 | //FIXME factorize | |
88 | me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub; | |
89 | ||
90 | if(s->no_rounding /*FIXME b_type*/){ | |
91 | hpel_put= &s->dsp.put_no_rnd_pixels_tab[size]; | |
92 | chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1]; | |
93 | }else{ | |
94 | hpel_put=& s->dsp.put_pixels_tab[size]; | |
95 | chroma_hpel_put= &s->dsp.put_pixels_tab[size+1]; | |
96 | } | |
97 | cmp= s->dsp.me_cmp[size]; | |
98 | chroma_cmp= s->dsp.me_cmp[size+1]; | |
99 | cmp_sub= s->dsp.me_sub_cmp[size]; | |
100 | chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; | |
101 | ||
102 | if(s->me.skip){ //FIXME somehow move up (benchmark) | |
103 | *mx_ptr = 0; | |
104 | *my_ptr = 0; | |
105 | return dmin; | |
106 | } | |
107 | ||
108 | if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ | |
109 | CMP_HPEL(dmin, 0, 0, mx, my, size); | |
110 | if(mx || my) | |
111 | dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; | |
112 | } | |
113 | ||
114 | if (mx > xmin && mx < xmax && | |
115 | my > ymin && my < ymax) { | |
116 | int bx=2*mx, by=2*my; | |
117 | int d= dmin; | |
118 | ||
119 | CHECK_HALF_MV(1, 1, mx-1, my-1) | |
120 | CHECK_HALF_MV(0, 1, mx , my-1) | |
121 | CHECK_HALF_MV(1, 1, mx , my-1) | |
122 | CHECK_HALF_MV(1, 0, mx-1, my ) | |
123 | CHECK_HALF_MV(1, 0, mx , my ) | |
124 | CHECK_HALF_MV(1, 1, mx-1, my ) | |
125 | CHECK_HALF_MV(0, 1, mx , my ) | |
126 | CHECK_HALF_MV(1, 1, mx , my ) | |
127 | ||
b07a5980 | 128 | assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2); |
1457ab52 MN |
129 | |
130 | *mx_ptr = bx; | |
131 | *my_ptr = by; | |
132 | }else{ | |
133 | *mx_ptr =2*mx; | |
134 | *my_ptr =2*my; | |
135 | } | |
136 | ||
137 | return dmin; | |
138 | } | |
139 | ||
140 | #else | |
141 | static int RENAME(hpel_motion_search)(MpegEncContext * s, | |
142 | int *mx_ptr, int *my_ptr, int dmin, | |
bb198e19 MN |
143 | int pred_x, int pred_y, uint8_t *src_data[3], |
144 | uint8_t *ref_data[3], int stride, int uvstride, | |
145 | int size, int h, uint8_t * const mv_penalty) | |
1457ab52 | 146 | { |
1457ab52 MN |
147 | const int mx = *mx_ptr; |
148 | const int my = *my_ptr; | |
149 | const int penalty_factor= s->me.sub_penalty_factor; | |
150 | me_cmp_func cmp_sub, chroma_cmp_sub; | |
67725183 | 151 | int bx=2*mx, by=2*my; |
1457ab52 | 152 | |
bb198e19 | 153 | LOAD_COMMON |
1457ab52 MN |
154 | |
155 | //FIXME factorize | |
156 | ||
157 | cmp_sub= s->dsp.me_sub_cmp[size]; | |
158 | chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; | |
159 | ||
160 | if(s->me.skip){ //FIXME move out of hpel? | |
161 | *mx_ptr = 0; | |
162 | *my_ptr = 0; | |
163 | return dmin; | |
164 | } | |
165 | ||
166 | if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ | |
167 | CMP_HPEL(dmin, 0, 0, mx, my, size); | |
1d0eab1d | 168 | if(mx || my || size>0) |
1457ab52 MN |
169 | dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; |
170 | } | |
171 | ||
172 | if (mx > xmin && mx < xmax && | |
173 | my > ymin && my < ymax) { | |
1457ab52 MN |
174 | int d= dmin; |
175 | const int index= (my<<ME_MAP_SHIFT) + mx; | |
176 | const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] | |
b07a5980 | 177 | + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*s->me.penalty_factor; |
1457ab52 | 178 | const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)] |
b07a5980 | 179 | + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*s->me.penalty_factor; |
1457ab52 | 180 | const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)] |
b07a5980 | 181 | + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*s->me.penalty_factor; |
1457ab52 | 182 | const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] |
b07a5980 MN |
183 | + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*s->me.penalty_factor; |
184 | ||
67725183 | 185 | #if 1 |
b07a5980 MN |
186 | int key; |
187 | int map_generation= s->me.map_generation; | |
802f454e | 188 | #ifndef NDEBUG |
b07a5980 | 189 | uint32_t *map= s->me.map; |
802f454e | 190 | #endif |
b07a5980 MN |
191 | key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation; |
192 | assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key); | |
193 | key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation; | |
194 | assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key); | |
195 | key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation; | |
196 | assert(map[(index+1)&(ME_MAP_SIZE-1)] == key); | |
197 | key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation; | |
198 | assert(map[(index-1)&(ME_MAP_SIZE-1)] == key); | |
199 | #endif | |
1457ab52 MN |
200 | if(t<=b){ |
201 | CHECK_HALF_MV(0, 1, mx ,my-1) | |
202 | if(l<=r){ | |
203 | CHECK_HALF_MV(1, 1, mx-1, my-1) | |
204 | if(t+r<=b+l){ | |
205 | CHECK_HALF_MV(1, 1, mx , my-1) | |
206 | }else{ | |
207 | CHECK_HALF_MV(1, 1, mx-1, my ) | |
208 | } | |
209 | CHECK_HALF_MV(1, 0, mx-1, my ) | |
210 | }else{ | |
211 | CHECK_HALF_MV(1, 1, mx , my-1) | |
212 | if(t+l<=b+r){ | |
213 | CHECK_HALF_MV(1, 1, mx-1, my-1) | |
214 | }else{ | |
215 | CHECK_HALF_MV(1, 1, mx , my ) | |
216 | } | |
217 | CHECK_HALF_MV(1, 0, mx , my ) | |
218 | } | |
219 | }else{ | |
220 | if(l<=r){ | |
221 | if(t+l<=b+r){ | |
222 | CHECK_HALF_MV(1, 1, mx-1, my-1) | |
223 | }else{ | |
224 | CHECK_HALF_MV(1, 1, mx , my ) | |
225 | } | |
226 | CHECK_HALF_MV(1, 0, mx-1, my) | |
227 | CHECK_HALF_MV(1, 1, mx-1, my) | |
228 | }else{ | |
229 | if(t+r<=b+l){ | |
230 | CHECK_HALF_MV(1, 1, mx , my-1) | |
231 | }else{ | |
232 | CHECK_HALF_MV(1, 1, mx-1, my) | |
233 | } | |
234 | CHECK_HALF_MV(1, 0, mx , my) | |
235 | CHECK_HALF_MV(1, 1, mx , my) | |
236 | } | |
237 | CHECK_HALF_MV(0, 1, mx , my) | |
238 | } | |
239 | assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2); | |
1457ab52 MN |
240 | } |
241 | ||
67725183 MN |
242 | *mx_ptr = bx; |
243 | *my_ptr = by; | |
244 | ||
1457ab52 MN |
245 | return dmin; |
246 | } | |
247 | #endif | |
248 | ||
bb198e19 MN |
249 | static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], |
250 | uint8_t *ref_data[3], int stride, int uvstride, | |
30952237 | 251 | uint8_t * const mv_penalty) |
67725183 MN |
252 | { |
253 | // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp; | |
254 | const int size= 0; | |
bb198e19 | 255 | const int h= 16; |
67725183 | 256 | const int penalty_factor= s->me.mb_penalty_factor; |
67725183 MN |
257 | me_cmp_func cmp_sub, chroma_cmp_sub; |
258 | int d; | |
259 | ||
bb198e19 | 260 | LOAD_COMMON |
67725183 MN |
261 | |
262 | //FIXME factorize | |
263 | ||
264 | cmp_sub= s->dsp.mb_cmp[size]; | |
265 | chroma_cmp_sub= s->dsp.mb_cmp[size+1]; | |
266 | ||
267 | assert(!s->me.skip); | |
268 | assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); | |
269 | ||
270 | CMP_HPEL(d, mx&1, my&1, mx>>1, my>>1, size); | |
271 | //FIXME check cbp before adding penalty for (0,0) vector | |
272 | if(mx || my || size>0) | |
273 | d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; | |
274 | ||
275 | return d; | |
276 | } | |
277 | ||
1457ab52 MN |
278 | #endif /* CMP_HPEL */ |
279 | ||
67725183 MN |
280 | |
281 | ||
1457ab52 MN |
282 | #ifdef CMP_QPEL |
283 | ||
284 | #define CHECK_QUARTER_MV(dx, dy, x, y)\ | |
285 | {\ | |
286 | const int hx= 4*(x)+(dx);\ | |
287 | const int hy= 4*(y)+(dy);\ | |
288 | CMP_QPEL(d, dx, dy, x, y, size);\ | |
289 | d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ | |
290 | COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ | |
291 | } | |
292 | ||
293 | static int RENAME(qpel_motion_search)(MpegEncContext * s, | |
294 | int *mx_ptr, int *my_ptr, int dmin, | |
bb198e19 MN |
295 | int pred_x, int pred_y, uint8_t *src_data[3], |
296 | uint8_t *ref_data[3], int stride, int uvstride, | |
297 | int size, int h, uint8_t * const mv_penalty) | |
1457ab52 | 298 | { |
1457ab52 MN |
299 | const int mx = *mx_ptr; |
300 | const int my = *my_ptr; | |
301 | const int penalty_factor= s->me.sub_penalty_factor; | |
302 | const int map_generation= s->me.map_generation; | |
826f429a | 303 | const int subpel_quality= s->avctx->me_subpel_quality; |
1457ab52 MN |
304 | uint32_t *map= s->me.map; |
305 | me_cmp_func cmp, chroma_cmp; | |
306 | me_cmp_func cmp_sub, chroma_cmp_sub; | |
307 | ||
bb198e19 | 308 | LOAD_COMMON |
1457ab52 MN |
309 | |
310 | cmp= s->dsp.me_cmp[size]; | |
311 | chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME | |
312 | //FIXME factorize | |
313 | ||
314 | cmp_sub= s->dsp.me_sub_cmp[size]; | |
315 | chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; | |
316 | ||
317 | if(s->me.skip){ //FIXME somehow move up (benchmark) | |
318 | *mx_ptr = 0; | |
319 | *my_ptr = 0; | |
320 | return dmin; | |
321 | } | |
322 | ||
323 | if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ | |
324 | CMP_QPEL(dmin, 0, 0, mx, my, size); | |
1d0eab1d | 325 | if(mx || my || size>0) |
1457ab52 MN |
326 | dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor; |
327 | } | |
328 | ||
329 | if (mx > xmin && mx < xmax && | |
330 | my > ymin && my < ymax) { | |
331 | int bx=4*mx, by=4*my; | |
332 | int d= dmin; | |
333 | int i, nx, ny; | |
334 | const int index= (my<<ME_MAP_SHIFT) + mx; | |
335 | const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; | |
336 | const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]; | |
337 | const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]; | |
338 | const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; | |
339 | const int c= score_map[(index )&(ME_MAP_SIZE-1)]; | |
340 | int best[8]; | |
341 | int best_pos[8][2]; | |
342 | ||
343 | memset(best, 64, sizeof(int)*8); | |
344 | #if 1 | |
826f429a | 345 | if(s->me.dia_size>=2){ |
1457ab52 MN |
346 | const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; |
347 | const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; | |
348 | const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; | |
349 | const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; | |
350 | ||
351 | for(ny= -3; ny <= 3; ny++){ | |
352 | for(nx= -3; nx <= 3; nx++){ | |
353 | const int t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t; | |
354 | const int c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c; | |
355 | const int b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b; | |
356 | int score= ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2; | |
357 | int i; | |
358 | ||
359 | if((nx&3)==0 && (ny&3)==0) continue; | |
360 | ||
361 | score += 1024*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; | |
362 | ||
363 | // if(nx&1) score-=1024*s->me.penalty_factor; | |
364 | // if(ny&1) score-=1024*s->me.penalty_factor; | |
365 | ||
366 | for(i=0; i<8; i++){ | |
367 | if(score < best[i]){ | |
368 | memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); | |
369 | memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i)); | |
370 | best[i]= score; | |
371 | best_pos[i][0]= nx + 4*mx; | |
372 | best_pos[i][1]= ny + 4*my; | |
373 | break; | |
374 | } | |
375 | } | |
376 | } | |
377 | } | |
378 | }else{ | |
379 | int tl; | |
380 | const int cx = 4*(r - l); | |
381 | const int cx2= r + l - 2*c; | |
382 | const int cy = 4*(b - t); | |
383 | const int cy2= b + t - 2*c; | |
384 | int cxy; | |
385 | ||
386 | if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME | |
387 | tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; | |
388 | }else{ | |
389 | CMP(tl, mx-1, my-1, size); //FIXME wrong if chroma me is different | |
390 | } | |
391 | ||
392 | cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c; | |
393 | ||
394 | assert(16*cx2 + 4*cx + 32*c == 32*r); | |
395 | assert(16*cx2 - 4*cx + 32*c == 32*l); | |
396 | assert(16*cy2 + 4*cy + 32*c == 32*b); | |
397 | assert(16*cy2 - 4*cy + 32*c == 32*t); | |
398 | assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl); | |
399 | ||
400 | for(ny= -3; ny <= 3; ny++){ | |
401 | for(nx= -3; nx <= 3; nx++){ | |
402 | int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor | |
403 | int i; | |
404 | ||
405 | if((nx&3)==0 && (ny&3)==0) continue; | |
406 | ||
407 | score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; | |
408 | // if(nx&1) score-=32*s->me.penalty_factor; | |
409 | // if(ny&1) score-=32*s->me.penalty_factor; | |
410 | ||
411 | for(i=0; i<8; i++){ | |
412 | if(score < best[i]){ | |
413 | memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); | |
414 | memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i)); | |
415 | best[i]= score; | |
416 | best_pos[i][0]= nx + 4*mx; | |
417 | best_pos[i][1]= ny + 4*my; | |
418 | break; | |
419 | } | |
420 | } | |
421 | } | |
422 | } | |
423 | } | |
826f429a | 424 | for(i=0; i<subpel_quality; i++){ |
1457ab52 MN |
425 | nx= best_pos[i][0]; |
426 | ny= best_pos[i][1]; | |
427 | CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2) | |
428 | } | |
826f429a | 429 | |
1457ab52 | 430 | #if 0 |
826f429a MN |
431 | const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; |
432 | const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; | |
433 | const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; | |
434 | const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; | |
435 | // if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){ | |
436 | if(tl<br){ | |
437 | ||
438 | // nx= FFMAX(4*mx - bx, bx - 4*mx); | |
439 | // ny= FFMAX(4*my - by, by - 4*my); | |
1457ab52 | 440 | |
826f429a MN |
441 | static int stats[7][7], count; |
442 | count++; | |
443 | stats[4*mx - bx + 3][4*my - by + 3]++; | |
444 | if(256*256*256*64 % count ==0){ | |
445 | for(i=0; i<49; i++){ | |
446 | if((i%7)==0) printf("\n"); | |
1457ab52 MN |
447 | printf("%6d ", stats[0][i]); |
448 | } | |
449 | printf("\n"); | |
450 | } | |
826f429a | 451 | } |
1457ab52 MN |
452 | #endif |
453 | #else | |
454 | ||
455 | CHECK_QUARTER_MV(2, 2, mx-1, my-1) | |
456 | CHECK_QUARTER_MV(0, 2, mx , my-1) | |
457 | CHECK_QUARTER_MV(2, 2, mx , my-1) | |
458 | CHECK_QUARTER_MV(2, 0, mx , my ) | |
459 | CHECK_QUARTER_MV(2, 2, mx , my ) | |
460 | CHECK_QUARTER_MV(0, 2, mx , my ) | |
461 | CHECK_QUARTER_MV(2, 2, mx-1, my ) | |
462 | CHECK_QUARTER_MV(2, 0, mx-1, my ) | |
463 | ||
464 | nx= bx; | |
465 | ny= by; | |
466 | ||
467 | for(i=0; i<8; i++){ | |
468 | int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1}; | |
469 | int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1}; | |
470 | CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2) | |
471 | } | |
472 | #endif | |
473 | #if 0 | |
474 | //outer ring | |
475 | CHECK_QUARTER_MV(1, 3, mx-1, my-1) | |
476 | CHECK_QUARTER_MV(1, 2, mx-1, my-1) | |
477 | CHECK_QUARTER_MV(1, 1, mx-1, my-1) | |
478 | CHECK_QUARTER_MV(2, 1, mx-1, my-1) | |
479 | CHECK_QUARTER_MV(3, 1, mx-1, my-1) | |
480 | CHECK_QUARTER_MV(0, 1, mx , my-1) | |
481 | CHECK_QUARTER_MV(1, 1, mx , my-1) | |
482 | CHECK_QUARTER_MV(2, 1, mx , my-1) | |
483 | CHECK_QUARTER_MV(3, 1, mx , my-1) | |
484 | CHECK_QUARTER_MV(3, 2, mx , my-1) | |
485 | CHECK_QUARTER_MV(3, 3, mx , my-1) | |
486 | CHECK_QUARTER_MV(3, 0, mx , my ) | |
487 | CHECK_QUARTER_MV(3, 1, mx , my ) | |
488 | CHECK_QUARTER_MV(3, 2, mx , my ) | |
489 | CHECK_QUARTER_MV(3, 3, mx , my ) | |
490 | CHECK_QUARTER_MV(2, 3, mx , my ) | |
491 | CHECK_QUARTER_MV(1, 3, mx , my ) | |
492 | CHECK_QUARTER_MV(0, 3, mx , my ) | |
493 | CHECK_QUARTER_MV(3, 3, mx-1, my ) | |
494 | CHECK_QUARTER_MV(2, 3, mx-1, my ) | |
495 | CHECK_QUARTER_MV(1, 3, mx-1, my ) | |
496 | CHECK_QUARTER_MV(1, 2, mx-1, my ) | |
497 | CHECK_QUARTER_MV(1, 1, mx-1, my ) | |
498 | CHECK_QUARTER_MV(1, 0, mx-1, my ) | |
499 | #endif | |
500 | assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4); | |
501 | ||
502 | *mx_ptr = bx; | |
503 | *my_ptr = by; | |
504 | }else{ | |
505 | *mx_ptr =4*mx; | |
506 | *my_ptr =4*my; | |
507 | } | |
508 | ||
509 | return dmin; | |
510 | } | |
511 | ||
bb198e19 MN |
512 | static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], |
513 | uint8_t *ref_data[3], int stride, int uvstride, | |
30952237 | 514 | uint8_t * const mv_penalty) |
67725183 MN |
515 | { |
516 | const int size= 0; | |
bb198e19 | 517 | const int h= 16; |
67725183 | 518 | const int penalty_factor= s->me.mb_penalty_factor; |
67725183 MN |
519 | me_cmp_func cmp_sub, chroma_cmp_sub; |
520 | int d; | |
521 | ||
bb198e19 | 522 | LOAD_COMMON |
67725183 MN |
523 | |
524 | //FIXME factorize | |
525 | ||
526 | cmp_sub= s->dsp.mb_cmp[size]; | |
527 | chroma_cmp_sub= s->dsp.mb_cmp[size+1]; | |
528 | ||
529 | assert(!s->me.skip); | |
530 | assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); | |
531 | ||
532 | CMP_QPEL(d, mx&3, my&3, mx>>2, my>>2, size); | |
533 | //FIXME check cbp before adding penalty for (0,0) vector | |
534 | if(mx || my || size>0) | |
535 | d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; | |
536 | ||
537 | return d; | |
538 | } | |
539 | ||
540 | ||
1457ab52 MN |
541 | #endif /* CMP_QPEL */ |
542 | ||
543 | #define CHECK_MV(x,y)\ | |
544 | {\ | |
545 | const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ | |
546 | const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ | |
b07a5980 | 547 | /*printf("check_mv %d %d\n", x, y);*/\ |
1457ab52 MN |
548 | if(map[index]!=key){\ |
549 | CMP(d, x, y, size);\ | |
550 | map[index]= key;\ | |
551 | score_map[index]= d;\ | |
552 | d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ | |
b07a5980 | 553 | /*printf("score:%d\n", d);*/\ |
1457ab52 MN |
554 | COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ |
555 | }\ | |
556 | } | |
557 | ||
b07a5980 MN |
558 | #define CHECK_CLIPED_MV(ax,ay)\ |
559 | {\ | |
560 | const int x= FFMAX(xmin, FFMIN(ax, xmax));\ | |
561 | const int y= FFMAX(ymin, FFMIN(ay, ymax));\ | |
562 | CHECK_MV(x, y)\ | |
563 | } | |
564 | ||
1457ab52 MN |
565 | #define CHECK_MV_DIR(x,y,new_dir)\ |
566 | {\ | |
567 | const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ | |
568 | const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ | |
b07a5980 | 569 | /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\ |
1457ab52 MN |
570 | if(map[index]!=key){\ |
571 | CMP(d, x, y, size);\ | |
572 | map[index]= key;\ | |
573 | score_map[index]= d;\ | |
574 | d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ | |
b07a5980 | 575 | /*printf("score:%d\n", d);*/\ |
1457ab52 MN |
576 | if(d<dmin){\ |
577 | best[0]=x;\ | |
578 | best[1]=y;\ | |
579 | dmin=d;\ | |
580 | next_dir= new_dir;\ | |
581 | }\ | |
582 | }\ | |
583 | } | |
584 | ||
585 | #define check(x,y,S,v)\ | |
586 | if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\ | |
587 | if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\ | |
588 | if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\ | |
589 | if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\ | |
590 | ||
591 | ||
592 | static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin, | |
bb198e19 MN |
593 | uint8_t *src_data[3], |
594 | uint8_t *ref_data[3], int stride, int uvstride, | |
1457ab52 | 595 | int const pred_x, int const pred_y, int const penalty_factor, |
bb198e19 MN |
596 | int const shift, |
597 | uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty | |
1457ab52 MN |
598 | ) |
599 | { | |
600 | me_cmp_func cmp, chroma_cmp; | |
601 | int next_dir=-1; | |
bb198e19 | 602 | LOAD_COMMON |
1457ab52 MN |
603 | |
604 | cmp= s->dsp.me_cmp[size]; | |
605 | chroma_cmp= s->dsp.me_cmp[size+1]; | |
606 | ||
b07a5980 MN |
607 | { /* ensure that the best point is in the MAP as h/qpel refinement needs it */ |
608 | const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation; | |
609 | const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1); | |
610 | if(map[index]!=key){ //this will be executed only very rarey | |
611 | CMP(score_map[index], best[0], best[1], size); | |
612 | map[index]= key; | |
613 | } | |
614 | } | |
615 | ||
1457ab52 MN |
616 | for(;;){ |
617 | int d; | |
618 | const int dir= next_dir; | |
619 | const int x= best[0]; | |
620 | const int y= best[1]; | |
621 | next_dir=-1; | |
622 | ||
623 | //printf("%d", dir); | |
624 | if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0) | |
625 | if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1) | |
626 | if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2) | |
627 | if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3) | |
628 | ||
629 | if(next_dir==-1){ | |
630 | return dmin; | |
631 | } | |
632 | } | |
633 | } | |
634 | ||
b07a5980 | 635 | static inline int RENAME(funny_diamond_search)(MpegEncContext * s, int *best, int dmin, |
bb198e19 MN |
636 | uint8_t *src_data[3], |
637 | uint8_t *ref_data[3], int stride, int uvstride, | |
b07a5980 | 638 | int const pred_x, int const pred_y, int const penalty_factor, |
bb198e19 MN |
639 | int const shift, |
640 | uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty | |
b07a5980 MN |
641 | ) |
642 | { | |
643 | me_cmp_func cmp, chroma_cmp; | |
644 | int dia_size; | |
bb198e19 | 645 | LOAD_COMMON |
b07a5980 MN |
646 | |
647 | cmp= s->dsp.me_cmp[size]; | |
648 | chroma_cmp= s->dsp.me_cmp[size+1]; | |
649 | ||
650 | for(dia_size=1; dia_size<=4; dia_size++){ | |
651 | int dir; | |
652 | const int x= best[0]; | |
653 | const int y= best[1]; | |
654 | ||
655 | if(dia_size&(dia_size-1)) continue; | |
656 | ||
657 | if( x + dia_size > xmax | |
658 | || x - dia_size < xmin | |
659 | || y + dia_size > ymax | |
660 | || y - dia_size < ymin) | |
661 | continue; | |
662 | ||
663 | for(dir= 0; dir<dia_size; dir+=2){ | |
664 | int d; | |
665 | ||
666 | CHECK_MV(x + dir , y + dia_size - dir); | |
667 | CHECK_MV(x + dia_size - dir, y - dir ); | |
668 | CHECK_MV(x - dir , y - dia_size + dir); | |
669 | CHECK_MV(x - dia_size + dir, y + dir ); | |
670 | } | |
671 | ||
672 | if(x!=best[0] || y!=best[1]) | |
673 | dia_size=0; | |
674 | #if 0 | |
675 | { | |
676 | int dx, dy, i; | |
677 | static int stats[8*8]; | |
678 | dx= ABS(x-best[0]); | |
679 | dy= ABS(y-best[1]); | |
680 | if(dy>dx){ | |
681 | dx^=dy; dy^=dx; dx^=dy; | |
682 | } | |
683 | stats[dy*8 + dx] ++; | |
684 | if(256*256*256*64 % (stats[0]+1)==0){ | |
685 | for(i=0; i<64; i++){ | |
686 | if((i&7)==0) printf("\n"); | |
687 | printf("%8d ", stats[i]); | |
688 | } | |
689 | printf("\n"); | |
690 | } | |
691 | } | |
692 | #endif | |
693 | } | |
694 | return dmin; | |
695 | } | |
696 | ||
697 | #define SAB_CHECK_MV(ax,ay)\ | |
698 | {\ | |
699 | const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\ | |
700 | const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\ | |
701 | /*printf("sab check %d %d\n", ax, ay);*/\ | |
702 | if(map[index]!=key){\ | |
703 | CMP(d, ax, ay, size);\ | |
704 | map[index]= key;\ | |
705 | score_map[index]= d;\ | |
706 | d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\ | |
707 | /*printf("score: %d\n", d);*/\ | |
708 | if(d < minima[minima_count-1].height){\ | |
709 | int j=0;\ | |
710 | \ | |
711 | while(d >= minima[j].height) j++;\ | |
712 | \ | |
713 | memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\ | |
714 | \ | |
715 | minima[j].checked= 0;\ | |
716 | minima[j].height= d;\ | |
717 | minima[j].x= ax;\ | |
718 | minima[j].y= ay;\ | |
719 | \ | |
720 | i=-1;\ | |
721 | continue;\ | |
722 | }\ | |
723 | }\ | |
724 | } | |
725 | ||
726 | #define MAX_SAB_SIZE 16 | |
727 | static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int dmin, | |
bb198e19 MN |
728 | uint8_t *src_data[3], |
729 | uint8_t *ref_data[3], int stride, int uvstride, | |
b07a5980 | 730 | int const pred_x, int const pred_y, int const penalty_factor, |
bb198e19 MN |
731 | int const shift, |
732 | uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty | |
b07a5980 MN |
733 | ) |
734 | { | |
735 | me_cmp_func cmp, chroma_cmp; | |
736 | Minima minima[MAX_SAB_SIZE]; | |
826f429a | 737 | const int minima_count= ABS(s->me.dia_size); |
b07a5980 | 738 | int i, j; |
bb198e19 | 739 | LOAD_COMMON |
b07a5980 MN |
740 | |
741 | cmp= s->dsp.me_cmp[size]; | |
742 | chroma_cmp= s->dsp.me_cmp[size+1]; | |
743 | ||
744 | for(j=i=0; i<ME_MAP_SIZE; i++){ | |
745 | uint32_t key= map[i]; | |
746 | ||
747 | key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1)); | |
748 | ||
749 | if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue; | |
750 | ||
751 | assert(j<MAX_SAB_SIZE); //max j = number of predictors | |
752 | ||
753 | minima[j].height= score_map[i]; | |
754 | minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS; | |
755 | minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1); | |
756 | minima[j].x-= (1<<(ME_MAP_MV_BITS-1)); | |
757 | minima[j].y-= (1<<(ME_MAP_MV_BITS-1)); | |
758 | minima[j].checked=0; | |
759 | if(minima[j].x || minima[j].y) | |
760 | minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor; | |
761 | ||
762 | j++; | |
763 | } | |
764 | ||
765 | qsort(minima, j, sizeof(Minima), minima_cmp); | |
766 | ||
767 | for(; j<minima_count; j++){ | |
768 | minima[j].height=256*256*256*64; | |
769 | minima[j].checked=0; | |
770 | minima[j].x= minima[j].y=0; | |
771 | } | |
772 | ||
773 | for(i=0; i<minima_count; i++){ | |
774 | const int x= minima[i].x; | |
775 | const int y= minima[i].y; | |
776 | int d; | |
777 | ||
778 | if(minima[i].checked) continue; | |
779 | ||
780 | if( x >= xmax || x <= xmin | |
781 | || y >= ymax || y <= ymin) | |
782 | continue; | |
783 | ||
784 | SAB_CHECK_MV(x-1, y) | |
785 | SAB_CHECK_MV(x+1, y) | |
786 | SAB_CHECK_MV(x , y-1) | |
787 | SAB_CHECK_MV(x , y+1) | |
788 | ||
789 | minima[i].checked= 1; | |
790 | } | |
791 | ||
792 | best[0]= minima[0].x; | |
793 | best[1]= minima[0].y; | |
794 | dmin= minima[0].height; | |
795 | ||
796 | if( best[0] < xmax && best[0] > xmin | |
797 | && best[1] < ymax && best[1] > ymin){ | |
798 | int d; | |
799 | //ensure that the refernece samples for hpel refinement are in the map | |
800 | CHECK_MV(best[0]-1, best[1]) | |
801 | CHECK_MV(best[0]+1, best[1]) | |
802 | CHECK_MV(best[0], best[1]-1) | |
803 | CHECK_MV(best[0], best[1]+1) | |
804 | } | |
805 | return dmin; | |
806 | } | |
807 | ||
1457ab52 | 808 | static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin, |
bb198e19 MN |
809 | uint8_t *src_data[3], |
810 | uint8_t *ref_data[3], int stride, int uvstride, | |
1457ab52 | 811 | int const pred_x, int const pred_y, int const penalty_factor, |
bb198e19 MN |
812 | int const shift, |
813 | uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty | |
1457ab52 MN |
814 | ) |
815 | { | |
816 | me_cmp_func cmp, chroma_cmp; | |
b07a5980 | 817 | int dia_size; |
bb198e19 | 818 | LOAD_COMMON |
1457ab52 MN |
819 | |
820 | cmp= s->dsp.me_cmp[size]; | |
821 | chroma_cmp= s->dsp.me_cmp[size+1]; | |
822 | ||
826f429a | 823 | for(dia_size=1; dia_size<=s->me.dia_size; dia_size++){ |
1457ab52 MN |
824 | int dir, start, end; |
825 | const int x= best[0]; | |
826 | const int y= best[1]; | |
827 | ||
828 | start= FFMAX(0, y + dia_size - ymax); | |
b07a5980 | 829 | end = FFMIN(dia_size, xmax - x + 1); |
1457ab52 MN |
830 | for(dir= start; dir<end; dir++){ |
831 | int d; | |
832 | ||
833 | //check(x + dir,y + dia_size - dir,0, a0) | |
834 | CHECK_MV(x + dir , y + dia_size - dir); | |
835 | } | |
836 | ||
837 | start= FFMAX(0, x + dia_size - xmax); | |
b07a5980 | 838 | end = FFMIN(dia_size, y - ymin + 1); |
1457ab52 MN |
839 | for(dir= start; dir<end; dir++){ |
840 | int d; | |
841 | ||
842 | //check(x + dia_size - dir, y - dir,0, a1) | |
843 | CHECK_MV(x + dia_size - dir, y - dir ); | |
844 | } | |
845 | ||
846 | start= FFMAX(0, -y + dia_size + ymin ); | |
b07a5980 | 847 | end = FFMIN(dia_size, x - xmin + 1); |
1457ab52 MN |
848 | for(dir= start; dir<end; dir++){ |
849 | int d; | |
850 | ||
851 | //check(x - dir,y - dia_size + dir,0, a2) | |
852 | CHECK_MV(x - dir , y - dia_size + dir); | |
853 | } | |
854 | ||
855 | start= FFMAX(0, -x + dia_size + xmin ); | |
b07a5980 | 856 | end = FFMIN(dia_size, ymax - y + 1); |
1457ab52 MN |
857 | for(dir= start; dir<end; dir++){ |
858 | int d; | |
859 | ||
860 | //check(x - dia_size + dir, y + dir,0, a3) | |
861 | CHECK_MV(x - dia_size + dir, y + dir ); | |
862 | } | |
863 | ||
864 | if(x!=best[0] || y!=best[1]) | |
865 | dia_size=0; | |
b07a5980 MN |
866 | #if 0 |
867 | { | |
868 | int dx, dy, i; | |
869 | static int stats[8*8]; | |
870 | dx= ABS(x-best[0]); | |
871 | dy= ABS(y-best[1]); | |
872 | stats[dy*8 + dx] ++; | |
873 | if(256*256*256*64 % (stats[0]+1)==0){ | |
874 | for(i=0; i<64; i++){ | |
875 | if((i&7)==0) printf("\n"); | |
876 | printf("%6d ", stats[i]); | |
877 | } | |
878 | printf("\n"); | |
879 | } | |
880 | } | |
881 | #endif | |
1457ab52 MN |
882 | } |
883 | return dmin; | |
884 | } | |
885 | ||
bb198e19 | 886 | static int RENAME(epzs_motion_search)(MpegEncContext * s, |
1457ab52 | 887 | int *mx_ptr, int *my_ptr, |
bb198e19 MN |
888 | int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], |
889 | uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], | |
30952237 | 890 | int ref_mv_scale, uint8_t * const mv_penalty) |
1457ab52 MN |
891 | { |
892 | int best[2]={0, 0}; | |
893 | int d, dmin; | |
894 | const int shift= 1+s->quarter_sample; | |
895 | uint32_t *map= s->me.map; | |
896 | int map_generation; | |
897 | const int penalty_factor= s->me.penalty_factor; | |
898 | const int size=0; | |
bb198e19 MN |
899 | const int h=16; |
900 | const int ref_mv_stride= s->mb_stride; //pass as arg FIXME | |
901 | const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME | |
1457ab52 | 902 | me_cmp_func cmp, chroma_cmp; |
bb198e19 | 903 | LOAD_COMMON |
1457ab52 MN |
904 | |
905 | cmp= s->dsp.me_cmp[size]; | |
906 | chroma_cmp= s->dsp.me_cmp[size+1]; | |
907 | ||
908 | map_generation= update_map_generation(s); | |
909 | ||
910 | CMP(dmin, 0, 0, size); | |
911 | map[0]= map_generation; | |
912 | score_map[0]= dmin; | |
913 | ||
914 | /* first line */ | |
f931ff7b | 915 | if (s->mb_y == 0) { |
1457ab52 | 916 | CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) |
b07a5980 MN |
917 | CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
918 | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
1457ab52 MN |
919 | }else{ |
920 | if(dmin<256 && ( P_LEFT[0] |P_LEFT[1] | |
921 | |P_TOP[0] |P_TOP[1] | |
b07a5980 | 922 | |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){ |
1457ab52 MN |
923 | *mx_ptr= 0; |
924 | *my_ptr= 0; | |
925 | s->me.skip=1; | |
926 | return dmin; | |
927 | } | |
928 | CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) | |
929 | if(dmin>256*2){ | |
b07a5980 MN |
930 | CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
931 | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
1457ab52 MN |
932 | CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift) |
933 | CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift) | |
934 | CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) | |
935 | } | |
936 | } | |
937 | if(dmin>256*4){ | |
f931ff7b MN |
938 | if(s->me.pre_pass){ |
939 | CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16, | |
940 | (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16) | |
941 | CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, | |
942 | (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) | |
943 | }else{ | |
944 | CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, | |
945 | (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) | |
946 | CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, | |
947 | (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) | |
948 | } | |
1457ab52 | 949 | } |
b07a5980 MN |
950 | |
951 | if(s->avctx->last_predictor_count){ | |
952 | const int count= s->avctx->last_predictor_count; | |
953 | const int xstart= FFMAX(0, s->mb_x - count); | |
954 | const int ystart= FFMAX(0, s->mb_y - count); | |
955 | const int xend= FFMIN(s->mb_width , s->mb_x + count + 1); | |
956 | const int yend= FFMIN(s->mb_height, s->mb_y + count + 1); | |
957 | int mb_y; | |
958 | ||
959 | for(mb_y=ystart; mb_y<yend; mb_y++){ | |
960 | int mb_x; | |
961 | for(mb_x=xstart; mb_x<xend; mb_x++){ | |
962 | const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride; | |
963 | int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16; | |
964 | int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16; | |
965 | ||
966 | if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue; | |
967 | CHECK_MV(mx,my) | |
1457ab52 MN |
968 | } |
969 | } | |
970 | } | |
b07a5980 | 971 | |
1457ab52 | 972 | //check(best[0],best[1],0, b0) |
826f429a | 973 | if(s->me.dia_size==-1) |
bb198e19 MN |
974 | dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, |
975 | pred_x, pred_y, penalty_factor, | |
976 | shift, map, map_generation, size, h, mv_penalty); | |
826f429a | 977 | else if(s->me.dia_size<-1) |
bb198e19 MN |
978 | dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, |
979 | pred_x, pred_y, penalty_factor, | |
980 | shift, map, map_generation, size, h, mv_penalty); | |
826f429a | 981 | else if(s->me.dia_size<2) |
bb198e19 MN |
982 | dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, |
983 | pred_x, pred_y, penalty_factor, | |
984 | shift, map, map_generation, size, h, mv_penalty); | |
1457ab52 | 985 | else |
bb198e19 MN |
986 | dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, |
987 | pred_x, pred_y, penalty_factor, | |
988 | shift, map, map_generation, size, h, mv_penalty); | |
1457ab52 MN |
989 | |
990 | //check(best[0],best[1],0, b1) | |
991 | *mx_ptr= best[0]; | |
992 | *my_ptr= best[1]; | |
993 | ||
994 | // printf("%d %d %d \n", best[0], best[1], dmin); | |
995 | return dmin; | |
996 | } | |
997 | ||
998 | #ifndef CMP_DIRECT /* no 4mv search needed in direct mode */ | |
bb198e19 | 999 | static int RENAME(epzs_motion_search4)(MpegEncContext * s, |
1457ab52 MN |
1000 | int *mx_ptr, int *my_ptr, |
1001 | int P[10][2], int pred_x, int pred_y, | |
bb198e19 MN |
1002 | uint8_t *src_data[3], |
1003 | uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], | |
30952237 | 1004 | int ref_mv_scale, uint8_t * const mv_penalty) |
1457ab52 MN |
1005 | { |
1006 | int best[2]={0, 0}; | |
1007 | int d, dmin; | |
1008 | const int shift= 1+s->quarter_sample; | |
1009 | uint32_t *map= s->me.map; | |
1010 | int map_generation; | |
1011 | const int penalty_factor= s->me.penalty_factor; | |
1012 | const int size=1; | |
bb198e19 | 1013 | const int h=8; |
7bc9090a MN |
1014 | const int ref_mv_stride= s->mb_stride; |
1015 | const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; | |
1457ab52 | 1016 | me_cmp_func cmp, chroma_cmp; |
bb198e19 | 1017 | LOAD_COMMON |
1457ab52 MN |
1018 | |
1019 | cmp= s->dsp.me_cmp[size]; | |
1020 | chroma_cmp= s->dsp.me_cmp[size+1]; | |
1021 | ||
1022 | map_generation= update_map_generation(s); | |
1023 | ||
1024 | dmin = 1000000; | |
1025 | //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | |
1026 | /* first line */ | |
bb198e19 | 1027 | if (s->mb_y == 0/* && block<2*/) { |
1457ab52 | 1028 | CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) |
b07a5980 MN |
1029 | CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
1030 | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
1457ab52 MN |
1031 | CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) |
1032 | }else{ | |
1033 | CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | |
1034 | //FIXME try some early stop | |
1035 | if(dmin>64*2){ | |
1036 | CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) | |
1037 | CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |
1038 | CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift) | |
1039 | CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) | |
b07a5980 MN |
1040 | CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
1041 | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
1457ab52 MN |
1042 | } |
1043 | } | |
1044 | if(dmin>64*4){ | |
b07a5980 MN |
1045 | CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, |
1046 | (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) | |
1047 | CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, | |
1048 | (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) | |
1457ab52 MN |
1049 | } |
1050 | ||
826f429a | 1051 | if(s->me.dia_size==-1) |
bb198e19 MN |
1052 | dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, |
1053 | pred_x, pred_y, penalty_factor, | |
1054 | shift, map, map_generation, size, h, mv_penalty); | |
826f429a | 1055 | else if(s->me.dia_size<-1) |
bb198e19 MN |
1056 | dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, |
1057 | pred_x, pred_y, penalty_factor, | |
1058 | shift, map, map_generation, size, h, mv_penalty); | |
826f429a | 1059 | else if(s->me.dia_size<2) |
bb198e19 MN |
1060 | dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, |
1061 | pred_x, pred_y, penalty_factor, | |
1062 | shift, map, map_generation, size, h, mv_penalty); | |
1457ab52 | 1063 | else |
bb198e19 MN |
1064 | dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, |
1065 | pred_x, pred_y, penalty_factor, | |
1066 | shift, map, map_generation, size, h, mv_penalty); | |
1067 | ||
1068 | ||
1069 | *mx_ptr= best[0]; | |
1070 | *my_ptr= best[1]; | |
1071 | ||
1072 | // printf("%d %d %d \n", best[0], best[1], dmin); | |
1073 | return dmin; | |
1074 | } | |
1075 | ||
1076 | //try to merge with above FIXME (needs PSNR test) | |
1077 | static int RENAME(epzs_motion_search2)(MpegEncContext * s, | |
1078 | int *mx_ptr, int *my_ptr, | |
1079 | int P[10][2], int pred_x, int pred_y, | |
1080 | uint8_t *src_data[3], | |
1081 | uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], | |
1082 | int ref_mv_scale, uint8_t * const mv_penalty) | |
1083 | { | |
1084 | int best[2]={0, 0}; | |
1085 | int d, dmin; | |
1086 | const int shift= 1+s->quarter_sample; | |
1087 | uint32_t *map= s->me.map; | |
1088 | int map_generation; | |
1089 | const int penalty_factor= s->me.penalty_factor; | |
1090 | const int size=0; //FIXME pass as arg | |
1091 | const int h=8; | |
1092 | const int ref_mv_stride= s->mb_stride; | |
1093 | const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; | |
1094 | me_cmp_func cmp, chroma_cmp; | |
1095 | LOAD_COMMON | |
1096 | ||
1097 | cmp= s->dsp.me_cmp[size]; | |
1098 | chroma_cmp= s->dsp.me_cmp[size+1]; | |
1099 | ||
1100 | map_generation= update_map_generation(s); | |
1101 | ||
1102 | dmin = 1000000; | |
1103 | //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | |
1104 | /* first line */ | |
1105 | if (s->mb_y == 0) { | |
1106 | CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |
1107 | CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, | |
1108 | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
1109 | CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | |
1110 | }else{ | |
1111 | CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | |
1112 | //FIXME try some early stop | |
1113 | if(dmin>64*2){ | |
1114 | CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) | |
1115 | CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |
1116 | CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift) | |
1117 | CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) | |
1118 | CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, | |
1119 | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
1120 | } | |
1121 | } | |
1122 | if(dmin>64*4){ | |
1123 | CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, | |
1124 | (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) | |
1125 | CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, | |
1126 | (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) | |
1127 | } | |
1128 | ||
1129 | if(s->me.dia_size==-1) | |
1130 | dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, | |
1131 | pred_x, pred_y, penalty_factor, | |
1132 | shift, map, map_generation, size, h, mv_penalty); | |
1133 | else if(s->me.dia_size<-1) | |
1134 | dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, | |
1135 | pred_x, pred_y, penalty_factor, | |
1136 | shift, map, map_generation, size, h, mv_penalty); | |
1137 | else if(s->me.dia_size<2) | |
1138 | dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, | |
1139 | pred_x, pred_y, penalty_factor, | |
1140 | shift, map, map_generation, size, h, mv_penalty); | |
1141 | else | |
1142 | dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, | |
1143 | pred_x, pred_y, penalty_factor, | |
1144 | shift, map, map_generation, size, h, mv_penalty); | |
1145 | ||
b07a5980 | 1146 | |
1457ab52 MN |
1147 | *mx_ptr= best[0]; |
1148 | *my_ptr= best[1]; | |
1149 | ||
1150 | // printf("%d %d %d \n", best[0], best[1], dmin); | |
1151 | return dmin; | |
1152 | } | |
1153 | #endif /* !CMP_DIRECT */ |