added emms_c() macro which should can used in c code in both mmx/non mmx cases
[libav.git] / libavcodec / motion_est.c
CommitLineData
de6d9b64
FB
1/*
2 * Motion estimation
3 * Copyright (c) 2000,2001 Gerard Lantau.
4 *
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#include <stdlib.h>
21#include <stdio.h>
22#include "avcodec.h"
23#include "dsputil.h"
24#include "mpegvideo.h"
25
26static void halfpel_motion_search(MpegEncContext * s,
27 int *mx_ptr, int *my_ptr, int dmin,
28 int xmin, int ymin, int xmax, int ymax);
29
30/* config it to test motion vector encoding (send random vectors) */
31//#define CONFIG_TEST_MV_ENCODE
32
33static int pix_sum(UINT8 * pix, int line_size)
34{
35 int s, i, j;
36
37 s = 0;
38 for (i = 0; i < 16; i++) {
39 for (j = 0; j < 16; j += 8) {
40 s += pix[0];
41 s += pix[1];
42 s += pix[2];
43 s += pix[3];
44 s += pix[4];
45 s += pix[5];
46 s += pix[6];
47 s += pix[7];
48 pix += 8;
49 }
50 pix += line_size - 16;
51 }
52 return s;
53}
54
55static int pix_norm1(UINT8 * pix, int line_size)
56{
57 int s, i, j;
58 UINT32 *sq = squareTbl + 256;
59
60 s = 0;
61 for (i = 0; i < 16; i++) {
62 for (j = 0; j < 16; j += 8) {
63 s += sq[pix[0]];
64 s += sq[pix[1]];
65 s += sq[pix[2]];
66 s += sq[pix[3]];
67 s += sq[pix[4]];
68 s += sq[pix[5]];
69 s += sq[pix[6]];
70 s += sq[pix[7]];
71 pix += 8;
72 }
73 pix += line_size - 16;
74 }
75 return s;
76}
77
78static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size)
79{
80 int s, i, j;
81 UINT32 *sq = squareTbl + 256;
82
83 s = 0;
84 for (i = 0; i < 16; i++) {
85 for (j = 0; j < 16; j += 8) {
86 s += sq[pix1[0] - pix2[0]];
87 s += sq[pix1[1] - pix2[1]];
88 s += sq[pix1[2] - pix2[2]];
89 s += sq[pix1[3] - pix2[3]];
90 s += sq[pix1[4] - pix2[4]];
91 s += sq[pix1[5] - pix2[5]];
92 s += sq[pix1[6] - pix2[6]];
93 s += sq[pix1[7] - pix2[7]];
94 pix1 += 8;
95 pix2 += 8;
96 }
97 pix1 += line_size - 16;
98 pix2 += line_size - 16;
99 }
100 return s;
101}
102
103static void no_motion_search(MpegEncContext * s,
104 int *mx_ptr, int *my_ptr)
105{
106 *mx_ptr = 16 * s->mb_x;
107 *my_ptr = 16 * s->mb_y;
108}
109
110static int full_motion_search(MpegEncContext * s,
111 int *mx_ptr, int *my_ptr, int range,
112 int xmin, int ymin, int xmax, int ymax)
113{
114 int x1, y1, x2, y2, xx, yy, x, y;
115 int mx, my, dmin, d;
116 UINT8 *pix;
117
118 xx = 16 * s->mb_x;
119 yy = 16 * s->mb_y;
120 x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
121 if (x1 < xmin)
122 x1 = xmin;
123 x2 = xx + range - 1;
124 if (x2 > xmax)
125 x2 = xmax;
126 y1 = yy - range + 1;
127 if (y1 < ymin)
128 y1 = ymin;
129 y2 = yy + range - 1;
130 if (y2 > ymax)
131 y2 = ymax;
132 pix = s->new_picture[0] + (yy * s->linesize) + xx;
133 dmin = 0x7fffffff;
134 mx = 0;
135 my = 0;
136 for (y = y1; y <= y2; y++) {
137 for (x = x1; x <= x2; x++) {
138 d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x,
139 s->linesize, 16);
140 if (d < dmin ||
141 (d == dmin &&
142 (abs(x - xx) + abs(y - yy)) <
143 (abs(mx - xx) + abs(my - yy)))) {
144 dmin = d;
145 mx = x;
146 my = y;
147 }
148 }
149 }
150
151 *mx_ptr = mx;
152 *my_ptr = my;
153
154#if 0
155 if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
156 *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
157 fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
158 }
159#endif
160 return dmin;
161}
162
163
164static int log_motion_search(MpegEncContext * s,
165 int *mx_ptr, int *my_ptr, int range,
166 int xmin, int ymin, int xmax, int ymax)
167{
168 int x1, y1, x2, y2, xx, yy, x, y;
169 int mx, my, dmin, d;
170 UINT8 *pix;
171
172 xx = s->mb_x << 4;
173 yy = s->mb_y << 4;
174
175 /* Left limit */
176 x1 = xx - range;
177 if (x1 < xmin)
178 x1 = xmin;
179
180 /* Right limit */
181 x2 = xx + range;
182 if (x2 > xmax)
183 x2 = xmax;
184
185 /* Upper limit */
186 y1 = yy - range;
187 if (y1 < ymin)
188 y1 = ymin;
189
190 /* Lower limit */
191 y2 = yy + range;
192 if (y2 > ymax)
193 y2 = ymax;
194
195 pix = s->new_picture[0] + (yy * s->linesize) + xx;
196 dmin = 0x7fffffff;
197 mx = 0;
198 my = 0;
199
200 do {
201 for (y = y1; y <= y2; y += range) {
202 for (x = x1; x <= x2; x += range) {
203 d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16);
204 if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
205 dmin = d;
206 mx = x;
207 my = y;
208 }
209 }
210 }
211
212 range = range >> 1;
213
214 x1 = mx - range;
215 if (x1 < xmin)
216 x1 = xmin;
217
218 x2 = mx + range;
219 if (x2 > xmax)
220 x2 = xmax;
221
222 y1 = my - range;
223 if (y1 < ymin)
224 y1 = ymin;
225
226 y2 = my + range;
227 if (y2 > ymax)
228 y2 = ymax;
229
230 } while (range >= 1);
231
232#ifdef DEBUG
233 fprintf(stderr, "log - MX: %d\tMY: %d\n", mx, my);
234#endif
235 *mx_ptr = mx;
236 *my_ptr = my;
237 return dmin;
238}
239
240static int phods_motion_search(MpegEncContext * s,
241 int *mx_ptr, int *my_ptr, int range,
242 int xmin, int ymin, int xmax, int ymax)
243{
244 int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
245 int mx, my, dminx, dminy;
246 UINT8 *pix;
247
248 xx = s->mb_x << 4;
249 yy = s->mb_y << 4;
250
251 /* Left limit */
252 x1 = xx - range;
253 if (x1 < xmin)
254 x1 = xmin;
255
256 /* Right limit */
257 x2 = xx + range;
258 if (x2 > xmax)
259 x2 = xmax;
260
261 /* Upper limit */
262 y1 = yy - range;
263 if (y1 < ymin)
264 y1 = ymin;
265
266 /* Lower limit */
267 y2 = yy + range;
268 if (y2 > ymax)
269 y2 = ymax;
270
271 pix = s->new_picture[0] + (yy * s->linesize) + xx;
272 mx = 0;
273 my = 0;
274
275 x = xx;
276 y = yy;
277 do {
278 dminx = 0x7fffffff;
279 dminy = 0x7fffffff;
280
281 lastx = x;
282 for (x = x1; x <= x2; x += range) {
283 d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16);
284 if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
285 dminx = d;
286 mx = x;
287 }
288 }
289
290 x = lastx;
291 for (y = y1; y <= y2; y += range) {
292 d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16);
293 if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
294 dminy = d;
295 my = y;
296 }
297 }
298
299 range = range >> 1;
300
301 x = mx;
302 y = my;
303 x1 = mx - range;
304 if (x1 < xmin)
305 x1 = xmin;
306
307 x2 = mx + range;
308 if (x2 > xmax)
309 x2 = xmax;
310
311 y1 = my - range;
312 if (y1 < ymin)
313 y1 = ymin;
314
315 y2 = my + range;
316 if (y2 > ymax)
317 y2 = ymax;
318
319 } while (range >= 1);
320
321#ifdef DEBUG
322 fprintf(stderr, "phods - MX: %d\tMY: %d\n", mx, my);
323#endif
324
325 /* half pixel search */
326 *mx_ptr = mx;
327 *my_ptr = my;
328 return dminy;
329}
330
331/* The idea would be to make half pel ME after Inter/Intra decision to
332 save time. */
333static void halfpel_motion_search(MpegEncContext * s,
334 int *mx_ptr, int *my_ptr, int dmin,
335 int xmin, int ymin, int xmax, int ymax)
336{
337 int mx, my, mx1, my1, d, xx, yy, dminh;
338 UINT8 *pix;
339
340 mx = *mx_ptr << 1;
341 my = *my_ptr << 1;
342
343 xx = 16 * s->mb_x;
344 yy = 16 * s->mb_y;
345
346 dminh = dmin;
347
348 /* Half pixel search */
349 mx1 = mx;
350 my1 = my;
351
352 pix = s->new_picture[0] + (yy * s->linesize) + xx;
353
354 if ((mx > (xmin << 1)) && mx < (xmax << 1) &&
355 (my > (ymin << 1)) && my < (ymax << 1)) {
356 int dx, dy, px, py;
357 UINT8 *ptr;
358 for (dy = -1; dy <= 1; dy++) {
359 for (dx = -1; dx <= 1; dx++) {
360 if (dx != 0 || dy != 0) {
361 px = mx1 + dx;
362 py = my1 + dy;
363 ptr = s->last_picture[0] + ((py >> 1) * s->linesize) + (px >> 1);
364 switch (((py & 1) << 1) | (px & 1)) {
365 default:
366 case 0:
367 d = pix_abs16x16(pix, ptr, s->linesize, 16);
368 break;
369 case 1:
370 d = pix_abs16x16_x2(pix, ptr, s->linesize, 16);
371 break;
372 case 2:
373 d = pix_abs16x16_y2(pix, ptr, s->linesize, 16);
374 break;
375 case 3:
376 d = pix_abs16x16_xy2(pix, ptr, s->linesize, 16);
377 break;
378 }
379 if (d < dminh) {
380 dminh = d;
381 mx = px;
382 my = py;
383 }
384 }
385 }
386 }
387 }
388
389 *mx_ptr = mx - (xx << 1);
390 *my_ptr = my - (yy << 1);
391 //fprintf(stderr,"half - MX: %d\tMY: %d\n",*mx_ptr ,*my_ptr);
392}
393
394#ifndef CONFIG_TEST_MV_ENCODE
395
396int estimate_motion(MpegEncContext * s,
397 int mb_x, int mb_y,
398 int *mx_ptr, int *my_ptr)
399{
400 UINT8 *pix, *ppix;
401 int sum, varc, vard, mx, my, range, dmin, xx, yy;
402 int xmin, ymin, xmax, ymax;
403
404 range = 8 * (1 << (s->f_code - 1));
405 /* XXX: temporary kludge to avoid overflow for msmpeg4 */
406 if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
407 range = range * 2;
408
409 if (s->unrestricted_mv) {
410 xmin = -16;
411 ymin = -16;
412 xmax = s->width;
413 ymax = s->height;
414 } else {
415 xmin = 0;
416 ymin = 0;
417 xmax = s->width - 16;
418 ymax = s->height - 16;
419 }
420
421 switch(s->full_search) {
422 case ME_ZERO:
423 default:
424 no_motion_search(s, &mx, &my);
425 dmin = 0;
426 break;
427 case ME_FULL:
428 dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax);
429 break;
430 case ME_LOG:
431 dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
432 break;
433 case ME_PHODS:
434 dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
435 break;
436 }
fb16b7e7 437 emms_c();
de6d9b64
FB
438
439 /* intra / predictive decision */
440 xx = mb_x * 16;
441 yy = mb_y * 16;
442
443 pix = s->new_picture[0] + (yy * s->linesize) + xx;
444 /* At this point (mx,my) are full-pell and the absolute displacement */
445 ppix = s->last_picture[0] + (my * s->linesize) + mx;
446
447 sum = pix_sum(pix, s->linesize);
448 varc = pix_norm1(pix, s->linesize);
449 vard = pix_norm(pix, ppix, s->linesize);
450
451 vard = vard >> 8;
452 sum = sum >> 8;
453 varc = (varc >> 8) - (sum * sum);
454#if 0
455 printf("varc=%d (sum=%d) vard=%d mx=%d my=%d\n",
456 varc, sum, vard, mx - xx, my - yy);
457#endif
458 if (vard <= 64 || vard < varc) {
459 if (s->full_search != ME_ZERO) {
460 halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax);
461 } else {
462 mx -= 16 * s->mb_x;
463 my -= 16 * s->mb_y;
464 }
465 *mx_ptr = mx;
466 *my_ptr = my;
467 return 0;
468 } else {
469 *mx_ptr = 0;
470 *my_ptr = 0;
471 return 1;
472 }
473}
474
475#else
476
477/* test version which generates valid random vectors */
478int estimate_motion(MpegEncContext * s,
479 int mb_x, int mb_y,
480 int *mx_ptr, int *my_ptr)
481{
482 int xx, yy, x1, y1, x2, y2, range;
483
484 if ((random() % 10) >= 5) {
485 range = 8 * (1 << (s->f_code - 1));
486 if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
487 range = range * 2;
488
489 xx = 16 * s->mb_x;
490 yy = 16 * s->mb_y;
491 x1 = xx - range;
492 if (x1 < 0)
493 x1 = 0;
494 x2 = xx + range - 1;
495 if (x2 > (s->width - 16))
496 x2 = s->width - 16;
497 y1 = yy - range;
498 if (y1 < 0)
499 y1 = 0;
500 y2 = yy + range - 1;
501 if (y2 > (s->height - 16))
502 y2 = s->height - 16;
503
504 *mx_ptr = (random() % (2 * (x2 - x1 + 1))) + 2 * (x1 - xx);
505 *my_ptr = (random() % (2 * (y2 - y1 + 1))) + 2 * (y1 - yy);
506 return 0;
507 } else {
508 *mx_ptr = 0;
509 *my_ptr = 0;
510 return 1;
511 }
512}
513
514#endif