2 * Alpha optimized DSP utils
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "../dsputil.h"
22 #include "../mpegvideo.h"
24 static void dct_unquantize_h263_intra_axp(MpegEncContext
*s
, DCTELEM
*block
,
30 DCTELEM
*orig_block
= block
;
33 qadd
= WORD_VEC((qscale
- 1) | 1);
35 /* This mask kills spill from negative subwords to the next subword. */
36 correction
= WORD_VEC((qmul
- 1) + 1); /* multiplication / addition */
40 block0
= block
[0] * s
->y_dc_scale
;
42 block0
= block
[0] * s
->c_dc_scale
;
46 n_coeffs
= 63; // does not always use zigzag table
48 for(i
= 0; i
<= n_coeffs
; block
+= 4, i
+= 4) {
49 uint64_t levels
, negmask
, zeros
, add
;
56 /* I don't think the speed difference justifies runtime
58 negmask
= maxsw4(levels
, -1); /* negative -> ffff (-1) */
59 negmask
= minsw4(negmask
, 0); /* positive -> 0000 (0) */
61 negmask
= cmpbge(WORD_VEC(0x7fff), levels
);
62 negmask
&= (negmask
>> 1) | (1 << 7);
63 negmask
= zap(-1, negmask
);
66 zeros
= cmpbge(0, levels
);
68 /* zeros |= zeros << 1 is not needed since qadd <= 255, so
69 zapping the lower byte suffices. */
72 levels
-= correction
& (negmask
<< 16);
74 /* Negate qadd for negative levels. */
76 add
+= WORD_VEC(0x0001) & negmask
;
77 /* Set qadd to 0 for levels == 0. */
78 add
= zap(add
, zeros
);
85 if (s
->mb_intra
&& !s
->h263_aic
)
86 orig_block
[0] = block0
;
89 static void dct_unquantize_h263_inter_axp(MpegEncContext
*s
, DCTELEM
*block
,
95 DCTELEM
*orig_block
= block
;
98 qadd
= WORD_VEC((qscale
- 1) | 1);
100 /* This mask kills spill from negative subwords to the next subword. */
101 correction
= WORD_VEC((qmul
- 1) + 1); /* multiplication / addition */
103 n_coeffs
= s
->intra_scantable
.raster_end
[s
->block_last_index
[n
]];
105 for(i
= 0; i
<= n_coeffs
; block
+= 4, i
+= 4) {
106 uint64_t levels
, negmask
, zeros
, add
;
113 /* I don't think the speed difference justifies runtime
115 negmask
= maxsw4(levels
, -1); /* negative -> ffff (-1) */
116 negmask
= minsw4(negmask
, 0); /* positive -> 0000 (0) */
118 negmask
= cmpbge(WORD_VEC(0x7fff), levels
);
119 negmask
&= (negmask
>> 1) | (1 << 7);
120 negmask
= zap(-1, negmask
);
123 zeros
= cmpbge(0, levels
);
125 /* zeros |= zeros << 1 is not needed since qadd <= 255, so
126 zapping the lower byte suffices. */
129 levels
-= correction
& (negmask
<< 16);
131 /* Negate qadd for negative levels. */
132 add
= qadd
^ negmask
;
133 add
+= WORD_VEC(0x0001) & negmask
;
134 /* Set qadd to 0 for levels == 0. */
135 add
= zap(add
, zeros
);
143 void MPV_common_init_axp(MpegEncContext
*s
)
145 s
->dct_unquantize_h263_intra
= dct_unquantize_h263_intra_axp
;
146 s
->dct_unquantize_h263_inter
= dct_unquantize_h263_inter_axp
;