Explicitly include fastmemcpy.h from libvo/.
[libav.git] / libavcodec / imgresample.c
CommitLineData
de6d9b64 1/*
115329f1 2 * High quality image resampling with polyphase filters
ff4ec49e 3 * Copyright (c) 2001 Fabrice Bellard.
de6d9b64 4 *
ff4ec49e
FB
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
de6d9b64 9 *
ff4ec49e 10 * This library is distributed in the hope that it will be useful,
de6d9b64 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
de6d9b64 14 *
ff4ec49e
FB
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
5509bffa 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 18 */
115329f1 19
983e3246
MN
20/**
21 * @file imgresample.c
22 * High quality image resampling with polyphase filters .
23 */
115329f1 24
de6d9b64 25#include "avcodec.h"
7b748aff 26#include "swscale.h"
6000abfa 27#include "dsputil.h"
de6d9b64 28
54329dd5 29#ifdef USE_FASTMEMCPY
f4bd289a 30#include "libvo/fastmemcpy.h"
54329dd5 31#endif
54329dd5 32
de6d9b64
FB
33#define NB_COMPONENTS 3
34
35#define PHASE_BITS 4
36#define NB_PHASES (1 << PHASE_BITS)
37#define NB_TAPS 4
38#define FCENTER 1 /* index of the center of the filter */
ab6d194a 39//#define TEST 1 /* Test it */
de6d9b64
FB
40
41#define POS_FRAC_BITS 16
42#define POS_FRAC (1 << POS_FRAC_BITS)
43/* 6 bits precision is needed for MMX */
44#define FILTER_BITS 8
45
46#define LINE_BUF_HEIGHT (NB_TAPS * 4)
47
48struct ImgReSampleContext {
1ff93ffc
TK
49 int iwidth, iheight, owidth, oheight;
50 int topBand, bottomBand, leftBand, rightBand;
51 int padtop, padbottom, padleft, padright;
52 int pad_owidth, pad_oheight;
de6d9b64 53 int h_incr, v_incr;
68b51e58
SH
54 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
55 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
0c1a9eda 56 uint8_t *line_buf;
de6d9b64
FB
57};
58
aaaf1635
MN
59void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
60
de6d9b64
FB
61static inline int get_phase(int pos)
62{
63 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
64}
65
66/* This function must be optimized */
da64ecc3 67static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08
DB
68 int src_width, int src_start, int src_incr,
69 int16_t *filters)
de6d9b64
FB
70{
71 int src_pos, phase, sum, i;
da64ecc3 72 const uint8_t *s;
0c1a9eda 73 int16_t *filter;
de6d9b64
FB
74
75 src_pos = src_start;
76 for(i=0;i<dst_width;i++) {
77#ifdef TEST
78 /* test */
79 if ((src_pos >> POS_FRAC_BITS) < 0 ||
80 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
02ac3136 81 av_abort();
de6d9b64
FB
82#endif
83 s = src + (src_pos >> POS_FRAC_BITS);
84 phase = get_phase(src_pos);
85 filter = filters + phase * NB_TAPS;
86#if NB_TAPS == 4
87 sum = s[0] * filter[0] +
88 s[1] * filter[1] +
89 s[2] * filter[2] +
90 s[3] * filter[3];
91#else
92 {
93 int j;
94 sum = 0;
95 for(j=0;j<NB_TAPS;j++)
96 sum += s[j] * filter[j];
97 }
98#endif
99 sum = sum >> FILTER_BITS;
100 if (sum < 0)
101 sum = 0;
102 else if (sum > 255)
103 sum = 255;
104 dst[0] = sum;
105 src_pos += src_incr;
106 dst++;
107 }
108}
109
110/* This function must be optimized */
da64ecc3 111static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 112 int wrap, int16_t *filter)
de6d9b64
FB
113{
114 int sum, i;
da64ecc3 115 const uint8_t *s;
de6d9b64
FB
116
117 s = src;
118 for(i=0;i<dst_width;i++) {
119#if NB_TAPS == 4
120 sum = s[0 * wrap] * filter[0] +
121 s[1 * wrap] * filter[1] +
122 s[2 * wrap] * filter[2] +
123 s[3 * wrap] * filter[3];
124#else
125 {
126 int j;
0c1a9eda 127 uint8_t *s1 = s;
de6d9b64
FB
128
129 sum = 0;
130 for(j=0;j<NB_TAPS;j++) {
131 sum += s1[0] * filter[j];
132 s1 += wrap;
133 }
134 }
135#endif
136 sum = sum >> FILTER_BITS;
137 if (sum < 0)
138 sum = 0;
139 else if (sum > 255)
140 sum = 255;
141 dst[0] = sum;
142 dst++;
143 s++;
144 }
145}
146
980fc7b8 147#ifdef HAVE_MMX
de6d9b64
FB
148
149#include "i386/mmx.h"
150
151#define FILTER4(reg) \
152{\
153 s = src + (src_pos >> POS_FRAC_BITS);\
154 phase = get_phase(src_pos);\
155 filter = filters + phase * NB_TAPS;\
156 movq_m2r(*s, reg);\
157 punpcklbw_r2r(mm7, reg);\
158 movq_m2r(*filter, mm6);\
159 pmaddwd_r2r(reg, mm6);\
160 movq_r2r(mm6, reg);\
161 psrlq_i2r(32, reg);\
162 paddd_r2r(mm6, reg);\
163 psrad_i2r(FILTER_BITS, reg);\
164 src_pos += src_incr;\
165}
166
167#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
168
169/* XXX: do four pixels at a time */
da64ecc3 170static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
bb270c08 171 const uint8_t *src, int src_width,
0c1a9eda 172 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
173{
174 int src_pos, phase;
da64ecc3 175 const uint8_t *s;
0c1a9eda 176 int16_t *filter;
de6d9b64 177 mmx_t tmp;
115329f1 178
de6d9b64
FB
179 src_pos = src_start;
180 pxor_r2r(mm7, mm7);
181
182 while (dst_width >= 4) {
183
184 FILTER4(mm0);
185 FILTER4(mm1);
186 FILTER4(mm2);
187 FILTER4(mm3);
188
189 packuswb_r2r(mm7, mm0);
190 packuswb_r2r(mm7, mm1);
191 packuswb_r2r(mm7, mm3);
192 packuswb_r2r(mm7, mm2);
193 movq_r2m(mm0, tmp);
194 dst[0] = tmp.ub[0];
195 movq_r2m(mm1, tmp);
196 dst[1] = tmp.ub[0];
197 movq_r2m(mm2, tmp);
198 dst[2] = tmp.ub[0];
199 movq_r2m(mm3, tmp);
200 dst[3] = tmp.ub[0];
201 dst += 4;
202 dst_width -= 4;
203 }
204 while (dst_width > 0) {
205 FILTER4(mm0);
206 packuswb_r2r(mm7, mm0);
207 movq_r2m(mm0, tmp);
208 dst[0] = tmp.ub[0];
209 dst++;
210 dst_width--;
211 }
212 emms();
213}
214
da64ecc3 215static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 216 int wrap, int16_t *filter)
de6d9b64
FB
217{
218 int sum, i, v;
da64ecc3 219 const uint8_t *s;
de6d9b64
FB
220 mmx_t tmp;
221 mmx_t coefs[4];
115329f1 222
de6d9b64
FB
223 for(i=0;i<4;i++) {
224 v = filter[i];
225 coefs[i].uw[0] = v;
226 coefs[i].uw[1] = v;
227 coefs[i].uw[2] = v;
228 coefs[i].uw[3] = v;
229 }
115329f1 230
de6d9b64
FB
231 pxor_r2r(mm7, mm7);
232 s = src;
233 while (dst_width >= 4) {
234 movq_m2r(s[0 * wrap], mm0);
235 punpcklbw_r2r(mm7, mm0);
236 movq_m2r(s[1 * wrap], mm1);
237 punpcklbw_r2r(mm7, mm1);
238 movq_m2r(s[2 * wrap], mm2);
239 punpcklbw_r2r(mm7, mm2);
240 movq_m2r(s[3 * wrap], mm3);
241 punpcklbw_r2r(mm7, mm3);
242
243 pmullw_m2r(coefs[0], mm0);
244 pmullw_m2r(coefs[1], mm1);
245 pmullw_m2r(coefs[2], mm2);
246 pmullw_m2r(coefs[3], mm3);
247
248 paddw_r2r(mm1, mm0);
249 paddw_r2r(mm3, mm2);
250 paddw_r2r(mm2, mm0);
251 psraw_i2r(FILTER_BITS, mm0);
115329f1 252
de6d9b64
FB
253 packuswb_r2r(mm7, mm0);
254 movq_r2m(mm0, tmp);
255
0c1a9eda 256 *(uint32_t *)dst = tmp.ud[0];
de6d9b64
FB
257 dst += 4;
258 s += 4;
259 dst_width -= 4;
260 }
261 while (dst_width > 0) {
262 sum = s[0 * wrap] * filter[0] +
263 s[1 * wrap] * filter[1] +
264 s[2 * wrap] * filter[2] +
265 s[3 * wrap] * filter[3];
266 sum = sum >> FILTER_BITS;
267 if (sum < 0)
268 sum = 0;
269 else if (sum > 255)
270 sum = 255;
271 dst[0] = sum;
272 dst++;
273 s++;
274 dst_width--;
275 }
276 emms();
277}
278#endif
279
404d2241 280#ifdef HAVE_ALTIVEC
bb270c08 281typedef union {
404d2241
BF
282 vector unsigned char v;
283 unsigned char c[16];
284} vec_uc_t;
285
bb270c08 286typedef union {
404d2241
BF
287 vector signed short v;
288 signed short s[8];
289} vec_ss_t;
290
da64ecc3 291void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08 292 int wrap, int16_t *filter)
404d2241
BF
293{
294 int sum, i;
da64ecc3 295 const uint8_t *s;
404d2241
BF
296 vector unsigned char *tv, tmp, dstv, zero;
297 vec_ss_t srchv[4], srclv[4], fv[4];
115329f1 298 vector signed short zeros, sumhv, sumlv;
404d2241
BF
299 s = src;
300
301 for(i=0;i<4;i++)
302 {
303 /*
304 The vec_madds later on does an implicit >>15 on the result.
305 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
306 a signed short, we have just enough bits to pre-shift our
307 filter constants <<7 to compensate for vec_madds.
308 */
309 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
310 fv[i].v = vec_splat(fv[i].v, 0);
311 }
115329f1 312
404d2241
BF
313 zero = vec_splat_u8(0);
314 zeros = vec_splat_s16(0);
315
316
317 /*
318 When we're resampling, we'd ideally like both our input buffers,
319 and output buffers to be 16-byte aligned, so we can do both aligned
320 reads and writes. Sadly we can't always have this at the moment, so
321 we opt for aligned writes, as unaligned writes have a huge overhead.
322 To do this, do enough scalar resamples to get dst 16-byte aligned.
323 */
9e4e1659 324 i = (-(int)dst) & 0xf;
404d2241
BF
325 while(i>0) {
326 sum = s[0 * wrap] * filter[0] +
327 s[1 * wrap] * filter[1] +
328 s[2 * wrap] * filter[2] +
329 s[3 * wrap] * filter[3];
330 sum = sum >> FILTER_BITS;
331 if (sum<0) sum = 0; else if (sum>255) sum=255;
332 dst[0] = sum;
333 dst++;
334 s++;
335 dst_width--;
336 i--;
337 }
115329f1 338
404d2241
BF
339 /* Do our altivec resampling on 16 pixels at once. */
340 while(dst_width>=16) {
341 /*
342 Read 16 (potentially unaligned) bytes from each of
343 4 lines into 4 vectors, and split them into shorts.
344 Interleave the multipy/accumulate for the resample
345 filter with the loads to hide the 3 cycle latency
346 the vec_madds have.
347 */
348 tv = (vector unsigned char *) &s[0 * wrap];
349 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
350 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
351 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
352 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
353 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
354
355 tv = (vector unsigned char *) &s[1 * wrap];
356 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
357 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
358 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
359 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
360 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
361
362 tv = (vector unsigned char *) &s[2 * wrap];
363 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
364 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
365 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
366 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
367 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
368
369 tv = (vector unsigned char *) &s[3 * wrap];
370 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
371 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
372 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
373 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
374 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
115329f1 375
404d2241
BF
376 /*
377 Pack the results into our destination vector,
378 and do an aligned write of that back to memory.
379 */
380 dstv = vec_packsu(sumhv, sumlv) ;
381 vec_st(dstv, 0, (vector unsigned char *) dst);
115329f1 382
404d2241
BF
383 dst+=16;
384 s+=16;
385 dst_width-=16;
386 }
387
388 /*
389 If there are any leftover pixels, resample them
390 with the slow scalar method.
391 */
392 while(dst_width>0) {
393 sum = s[0 * wrap] * filter[0] +
394 s[1 * wrap] * filter[1] +
395 s[2 * wrap] * filter[2] +
396 s[3 * wrap] * filter[3];
397 sum = sum >> FILTER_BITS;
398 if (sum<0) sum = 0; else if (sum>255) sum=255;
399 dst[0] = sum;
400 dst++;
401 s++;
402 dst_width--;
403 }
404}
405#endif
406
de6d9b64 407/* slow version to handle limit cases. Does not need optimisation */
da64ecc3 408static void h_resample_slow(uint8_t *dst, int dst_width,
bb270c08 409 const uint8_t *src, int src_width,
0c1a9eda 410 int src_start, int src_incr, int16_t *filters)
de6d9b64
FB
411{
412 int src_pos, phase, sum, j, v, i;
da64ecc3 413 const uint8_t *s, *src_end;
0c1a9eda 414 int16_t *filter;
de6d9b64
FB
415
416 src_end = src + src_width;
417 src_pos = src_start;
418 for(i=0;i<dst_width;i++) {
419 s = src + (src_pos >> POS_FRAC_BITS);
420 phase = get_phase(src_pos);
421 filter = filters + phase * NB_TAPS;
422 sum = 0;
423 for(j=0;j<NB_TAPS;j++) {
424 if (s < src)
425 v = src[0];
426 else if (s >= src_end)
427 v = src_end[-1];
428 else
429 v = s[0];
430 sum += v * filter[j];
431 s++;
432 }
433 sum = sum >> FILTER_BITS;
434 if (sum < 0)
435 sum = 0;
436 else if (sum > 255)
437 sum = 255;
438 dst[0] = sum;
439 src_pos += src_incr;
440 dst++;
441 }
442}
443
da64ecc3 444static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
bb270c08
DB
445 int src_width, int src_start, int src_incr,
446 int16_t *filters)
de6d9b64
FB
447{
448 int n, src_end;
449
450 if (src_start < 0) {
451 n = (0 - src_start + src_incr - 1) / src_incr;
452 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
453 dst += n;
454 dst_width -= n;
455 src_start += n * src_incr;
456 }
457 src_end = src_start + dst_width * src_incr;
458 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
115329f1 459 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
de6d9b64
FB
460 src_incr;
461 } else {
462 n = dst_width;
463 }
980fc7b8 464#ifdef HAVE_MMX
de6d9b64 465 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
115329f1 466 h_resample_fast4_mmx(dst, n,
de6d9b64
FB
467 src, src_width, src_start, src_incr, filters);
468 else
469#endif
115329f1 470 h_resample_fast(dst, n,
de6d9b64
FB
471 src, src_width, src_start, src_incr, filters);
472 if (n < dst_width) {
473 dst += n;
474 dst_width -= n;
475 src_start += n * src_incr;
115329f1 476 h_resample_slow(dst, dst_width,
de6d9b64
FB
477 src, src_width, src_start, src_incr, filters);
478 }
479}
480
115329f1 481static void component_resample(ImgReSampleContext *s,
0c1a9eda
ZK
482 uint8_t *output, int owrap, int owidth, int oheight,
483 uint8_t *input, int iwrap, int iwidth, int iheight)
de6d9b64
FB
484{
485 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
0c1a9eda 486 uint8_t *new_line, *src_line;
de6d9b64
FB
487
488 last_src_y = - FCENTER - 1;
489 /* position of the bottom of the filter in the source image */
115329f1 490 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
de6d9b64
FB
491 ring_y = NB_TAPS; /* position in ring buffer */
492 for(y=0;y<oheight;y++) {
493 /* apply horizontal filter on new lines from input if needed */
494 src_y1 = src_y >> POS_FRAC_BITS;
495 while (last_src_y < src_y1) {
496 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
497 ring_y = NB_TAPS;
498 last_src_y++;
ab6d194a
MN
499 /* handle limit conditions : replicate line (slightly
500 inefficient because we filter multiple times) */
de6d9b64
FB
501 y1 = last_src_y;
502 if (y1 < 0) {
503 y1 = 0;
504 } else if (y1 >= iheight) {
505 y1 = iheight - 1;
506 }
507 src_line = input + y1 * iwrap;
508 new_line = s->line_buf + ring_y * owidth;
509 /* apply filter and handle limit cases correctly */
115329f1
DB
510 h_resample(new_line, owidth,
511 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
de6d9b64
FB
512 &s->h_filters[0][0]);
513 /* handle ring buffer wraping */
514 if (ring_y >= LINE_BUF_HEIGHT) {
515 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
516 new_line, owidth);
517 }
518 }
519 /* apply vertical filter */
520 phase_y = get_phase(src_y);
980fc7b8 521#ifdef HAVE_MMX
de6d9b64
FB
522 /* desactivated MMX because loss of precision */
523 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
115329f1
DB
524 v_resample4_mmx(output, owidth,
525 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
de6d9b64 526 &s->v_filters[phase_y][0]);
404d2241
BF
527 else
528#endif
529#ifdef HAVE_ALTIVEC
00a7d8d6 530 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
404d2241
BF
531 v_resample16_altivec(output, owidth,
532 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
533 &s->v_filters[phase_y][0]);
de6d9b64
FB
534 else
535#endif
115329f1
DB
536 v_resample(output, owidth,
537 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
de6d9b64 538 &s->v_filters[phase_y][0]);
115329f1 539
de6d9b64 540 src_y += s->v_incr;
115329f1 541
de6d9b64
FB
542 output += owrap;
543 }
544}
545
de6d9b64
FB
546ImgReSampleContext *img_resample_init(int owidth, int oheight,
547 int iwidth, int iheight)
548{
115329f1 549 return img_resample_full_init(owidth, oheight, iwidth, iheight,
1ff93ffc 550 0, 0, 0, 0, 0, 0, 0, 0);
ab6d194a
MN
551}
552
553ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
554 int iwidth, int iheight,
555 int topBand, int bottomBand,
1ff93ffc
TK
556 int leftBand, int rightBand,
557 int padtop, int padbottom,
558 int padleft, int padright)
ab6d194a 559{
de6d9b64
FB
560 ImgReSampleContext *s;
561
d10dc616 562 if (!owidth || !oheight || !iwidth || !iheight)
bb270c08 563 return NULL;
d10dc616 564
de6d9b64
FB
565 s = av_mallocz(sizeof(ImgReSampleContext));
566 if (!s)
567 return NULL;
0ecca7a4
MN
568 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
569 return NULL;
de6d9b64 570 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
115329f1 571 if (!s->line_buf)
de6d9b64 572 goto fail;
115329f1 573
de6d9b64
FB
574 s->owidth = owidth;
575 s->oheight = oheight;
576 s->iwidth = iwidth;
577 s->iheight = iheight;
115329f1 578
ab6d194a
MN
579 s->topBand = topBand;
580 s->bottomBand = bottomBand;
581 s->leftBand = leftBand;
582 s->rightBand = rightBand;
115329f1 583
1ff93ffc
TK
584 s->padtop = padtop;
585 s->padbottom = padbottom;
586 s->padleft = padleft;
587 s->padright = padright;
588
589 s->pad_owidth = owidth - (padleft + padright);
590 s->pad_oheight = oheight - (padtop + padbottom);
591
592 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
115329f1 593 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
1ff93ffc 594
115329f1 595 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
aaaf1635 596 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
115329f1 597 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
aaaf1635 598 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
de6d9b64
FB
599
600 return s;
1ff93ffc 601fail:
6000abfa 602 av_free(s);
de6d9b64
FB
603 return NULL;
604}
605
115329f1 606void img_resample(ImgReSampleContext *s,
da64ecc3 607 AVPicture *output, const AVPicture *input)
de6d9b64
FB
608{
609 int i, shift;
1ff93ffc 610 uint8_t* optr;
de6d9b64 611
1ff93ffc 612 for (i=0;i<3;i++) {
de6d9b64 613 shift = (i == 0) ? 0 : 1;
1ff93ffc 614
115329f1 615 optr = output->data[i] + (((output->linesize[i] *
1ff93ffc
TK
616 s->padtop) + s->padleft) >> shift);
617
115329f1 618 component_resample(s, optr, output->linesize[i],
1ff93ffc 619 s->pad_owidth >> shift, s->pad_oheight >> shift,
115329f1 620 input->data[i] + (input->linesize[i] *
1ff93ffc 621 (s->topBand >> shift)) + (s->leftBand >> shift),
115329f1 622 input->linesize[i], ((s->iwidth - s->leftBand -
1ff93ffc 623 s->rightBand) >> shift),
ab6d194a 624 (s->iheight - s->topBand - s->bottomBand) >> shift);
de6d9b64
FB
625 }
626}
627
628void img_resample_close(ImgReSampleContext *s)
629{
6000abfa
FB
630 av_free(s->line_buf);
631 av_free(s);
de6d9b64
FB
632}
633
7b748aff
LA
634struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
635 int dstW, int dstH, int dstFormat,
636 int flags, SwsFilter *srcFilter,
637 SwsFilter *dstFilter, double *param)
638{
639 struct SwsContext *ctx;
640
641 ctx = av_malloc(sizeof(struct SwsContext));
642 if (ctx == NULL) {
643 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
644
645 return NULL;
646 }
647
648 if ((srcH != dstH) || (srcW != dstW)) {
649 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
650 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
651 }
652 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
653 } else {
654 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
655 ctx->resampling_ctx->iheight = srcH;
656 ctx->resampling_ctx->iwidth = srcW;
657 ctx->resampling_ctx->oheight = dstH;
658 ctx->resampling_ctx->owidth = dstW;
659 }
660 ctx->src_pix_fmt = srcFormat;
661 ctx->dst_pix_fmt = dstFormat;
662
663 return ctx;
664}
665
666void sws_freeContext(struct SwsContext *ctx)
667{
668 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
669 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
670 img_resample_close(ctx->resampling_ctx);
671 } else {
672 av_free(ctx->resampling_ctx);
673 }
674 av_free(ctx);
675}
676
677int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
678 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
679{
680 AVPicture src_pict, dst_pict;
681 int i, res = 0;
682 AVPicture picture_format_temp;
683 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
684 uint8_t *buf1 = NULL, *buf2 = NULL;
685 enum PixelFormat current_pix_fmt;
686
687 for (i = 0; i < 3; i++) {
688 src_pict.data[i] = src[i];
689 src_pict.linesize[i] = srcStride[i];
690 dst_pict.data[i] = dst[i];
691 dst_pict.linesize[i] = dstStride[i];
692 }
693 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
694 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
695 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
696
697 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
698 int size;
699
700 /* create temporary picture for rescaling input*/
701 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
702 buf1 = av_malloc(size);
703 if (!buf1) {
704 res = -1;
705 goto the_end;
706 }
707 formatted_picture = &picture_format_temp;
708 avpicture_fill((AVPicture*)formatted_picture, buf1,
709 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
710
711 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
712 &src_pict, ctx->src_pix_fmt,
713 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
714
715 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
716 res = -1;
717 goto the_end;
718 }
719 } else {
720 formatted_picture = &src_pict;
721 }
722
723 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
724 int size;
725
726 /* create temporary picture for rescaling output*/
727 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
728 buf2 = av_malloc(size);
729 if (!buf2) {
730 res = -1;
731 goto the_end;
732 }
733 resampled_picture = &picture_resample_temp;
734 avpicture_fill((AVPicture*)resampled_picture, buf2,
735 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
736
737 } else {
738 resampled_picture = &dst_pict;
739 }
740
741 /* ...and finally rescale!!! */
742 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
743 current_pix_fmt = PIX_FMT_YUV420P;
744 } else {
745 resampled_picture = &src_pict;
746 current_pix_fmt = ctx->src_pix_fmt;
747 }
748
749 if (current_pix_fmt != ctx->dst_pix_fmt) {
750 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
751 resampled_picture, current_pix_fmt,
752 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
753
754 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
755
756 res = -1;
757 goto the_end;
758 }
2793096f
LA
759 } else if (resampled_picture != &dst_pict) {
760 img_copy(&dst_pict, resampled_picture, current_pix_fmt,
761 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
7b748aff
LA
762 }
763
764the_end:
765 av_free(buf1);
766 av_free(buf2);
767 return res;
768}
769
770
de6d9b64 771#ifdef TEST
13160c07 772#include <stdio.h>
ab6d194a 773
de6d9b64
FB
774/* input */
775#define XSIZE 256
776#define YSIZE 256
0c1a9eda 777uint8_t img[XSIZE * YSIZE];
de6d9b64
FB
778
779/* output */
780#define XSIZE1 512
781#define YSIZE1 512
0c1a9eda
ZK
782uint8_t img1[XSIZE1 * YSIZE1];
783uint8_t img2[XSIZE1 * YSIZE1];
de6d9b64 784
0c1a9eda 785void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
de6d9b64 786{
95ae72b7 787#undef fprintf
de6d9b64
FB
788 FILE *f;
789 f=fopen(filename,"w");
790 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
791 fwrite(img,1, xsize * ysize,f);
792 fclose(f);
95ae72b7 793#define fprintf please_use_av_log
de6d9b64
FB
794}
795
0c1a9eda 796static void dump_filter(int16_t *filter)
de6d9b64
FB
797{
798 int i, ph;
799
800 for(ph=0;ph<NB_PHASES;ph++) {
13160c07 801 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
de6d9b64 802 for(i=0;i<NB_TAPS;i++) {
13160c07 803 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
de6d9b64 804 }
13160c07 805 av_log(NULL, AV_LOG_INFO, "\n");
de6d9b64
FB
806 }
807}
808
980fc7b8 809#ifdef HAVE_MMX
6acce86b 810int mm_flags;
de6d9b64
FB
811#endif
812
813int main(int argc, char **argv)
814{
815 int x, y, v, i, xsize, ysize;
816 ImgReSampleContext *s;
817 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
818 char buf[256];
819
820 /* build test image */
821 for(y=0;y<YSIZE;y++) {
822 for(x=0;x<XSIZE;x++) {
823 if (x < XSIZE/2 && y < YSIZE/2) {
824 if (x < XSIZE/4 && y < YSIZE/4) {
825 if ((x % 10) <= 6 &&
826 (y % 10) <= 6)
827 v = 0xff;
828 else
829 v = 0x00;
830 } else if (x < XSIZE/4) {
115329f1 831 if (x & 1)
de6d9b64 832 v = 0xff;
115329f1 833 else
de6d9b64
FB
834 v = 0;
835 } else if (y < XSIZE/4) {
115329f1 836 if (y & 1)
de6d9b64 837 v = 0xff;
115329f1 838 else
de6d9b64
FB
839 v = 0;
840 } else {
841 if (y < YSIZE*3/8) {
115329f1 842 if ((y+x) & 1)
de6d9b64 843 v = 0xff;
115329f1 844 else
de6d9b64
FB
845 v = 0;
846 } else {
847 if (((x+3) % 4) <= 1 &&
848 ((y+3) % 4) <= 1)
849 v = 0xff;
850 else
851 v = 0x00;
852 }
853 }
854 } else if (x < XSIZE/2) {
855 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
856 } else if (y < XSIZE/2) {
857 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
858 } else {
859 v = ((x + y - XSIZE) * 255) / XSIZE;
860 }
ab6d194a 861 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
de6d9b64
FB
862 }
863 }
864 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
865 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
866 fact = factors[i];
867 xsize = (int)(XSIZE * fact);
ab6d194a 868 ysize = (int)((YSIZE - 100) * fact);
13160c07
PI
869 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
870 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
de6d9b64
FB
871 dump_filter(&s->h_filters[0][0]);
872 component_resample(s, img1, xsize, xsize, ysize,
ab6d194a 873 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
de6d9b64
FB
874 img_resample_close(s);
875
2fc8ea24 876 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
de6d9b64
FB
877 save_pgm(buf, img1, xsize, ysize);
878 }
879
880 /* mmx test */
980fc7b8 881#ifdef HAVE_MMX
13160c07 882 av_log(NULL, AV_LOG_INFO, "MMX test\n");
de6d9b64
FB
883 fact = 0.72;
884 xsize = (int)(XSIZE * fact);
885 ysize = (int)(YSIZE * fact);
886 mm_flags = MM_MMX;
887 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
888 component_resample(s, img1, xsize, xsize, ysize,
889 img, XSIZE, XSIZE, YSIZE);
890
891 mm_flags = 0;
892 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
893 component_resample(s, img2, xsize, xsize, ysize,
894 img, XSIZE, XSIZE, YSIZE);
895 if (memcmp(img1, img2, xsize * ysize) != 0) {
13160c07 896 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
de6d9b64
FB
897 exit(1);
898 }
13160c07 899 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
de6d9b64
FB
900#endif
901 return 0;
902}
903
904#endif