Add ARCH_X86_32 as a new define for 32 bit x86 architectures and change
[libav.git] / libavcodec / cabac.h
CommitLineData
d592f67f
MN
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
d592f67f
MN
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
d592f67f 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
d592f67f
MN
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
5509bffa 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d592f67f
MN
20 *
21 */
115329f1 22
d592f67f
MN
23/**
24 * @file cabac.h
25 * Context Adaptive Binary Arithmetic Coder.
26 */
27
28
2848ce84 29//#undef NDEBUG
d592f67f 30#include <assert.h>
755073fe
RD
31#ifdef ARCH_X86_64
32#define ARCH_X86
33#endif
34#ifdef ARCH_X86
35#include "x86_cpu.h"
36#endif
d592f67f 37
5659b509 38#define CABAC_BITS 16
ec7eb896 39#define CABAC_MASK ((1<<CABAC_BITS)-1)
0bc2e7f0 40#define BRANCHLESS_CABAC_DECODER 1
a0f2c6ba 41//#define ARCH_X86_DISABLED 1
ec7eb896 42
d592f67f
MN
43typedef struct CABACContext{
44 int low;
45 int range;
46 int outstanding_count;
47#ifdef STRICT_LIMITS
48 int symCount;
49#endif
e96682e6
MN
50 const uint8_t *bytestream_start;
51 const uint8_t *bytestream;
bba83349 52 const uint8_t *bytestream_end;
d592f67f
MN
53 PutBitContext pb;
54}CABACContext;
55
68a205ed 56extern uint8_t ff_h264_mlps_state[4*64];
a0f2c6ba 57extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS
d61c4e73
MN
58extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS
59extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS
f24a5159 60extern const uint8_t ff_h264_norm_shift[512];
ec7eb896 61
d592f67f
MN
62
63void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
e96682e6 64void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
d61c4e73 65void ff_init_cabac_states(CABACContext *c);
d592f67f
MN
66
67
68static inline void put_cabac_bit(CABACContext *c, int b){
115329f1
DB
69 put_bits(&c->pb, 1, b);
70 for(;c->outstanding_count; c->outstanding_count--){
d592f67f
MN
71 put_bits(&c->pb, 1, 1-b);
72 }
73}
74
75static inline void renorm_cabac_encoder(CABACContext *c){
76 while(c->range < 0x100){
77 //FIXME optimize
78 if(c->low<0x100){
79 put_cabac_bit(c, 0);
80 }else if(c->low<0x200){
81 c->outstanding_count++;
82 c->low -= 0x100;
83 }else{
84 put_cabac_bit(c, 1);
85 c->low -= 0x200;
86 }
115329f1 87
d592f67f
MN
88 c->range+= c->range;
89 c->low += c->low;
90 }
91}
92
938dd846 93static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
f24a5159 94 int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
115329f1 95
d592f67f
MN
96 if(bit == ((*state)&1)){
97 c->range -= RangeLPS;
d61c4e73 98 *state= ff_h264_mps_state[*state];
d592f67f
MN
99 }else{
100 c->low += c->range - RangeLPS;
101 c->range = RangeLPS;
d61c4e73 102 *state= ff_h264_lps_state[*state];
d592f67f 103 }
115329f1 104
d592f67f
MN
105 renorm_cabac_encoder(c);
106
107#ifdef STRICT_LIMITS
108 c->symCount++;
109#endif
110}
111
938dd846 112static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
d592f67f
MN
113 assert(c->range > RangeLPS);
114
115 if(!bit){
116 c->range -= RangeLPS;
117 }else{
118 c->low += c->range - RangeLPS;
119 c->range = RangeLPS;
120 }
121
122 renorm_cabac_encoder(c);
123
124#ifdef STRICT_LIMITS
125 c->symCount++;
126#endif
127}
128
61ccfcc0
MN
129/**
130 * @param bit 0 -> write zero bit, !=0 write one bit
131 */
938dd846 132static void put_cabac_bypass(CABACContext *c, int bit){
d592f67f
MN
133 c->low += c->low;
134
135 if(bit){
136 c->low += c->range;
137 }
138//FIXME optimize
139 if(c->low<0x200){
140 put_cabac_bit(c, 0);
141 }else if(c->low<0x400){
142 c->outstanding_count++;
143 c->low -= 0x200;
144 }else{
145 put_cabac_bit(c, 1);
146 c->low -= 0x400;
147 }
115329f1 148
d592f67f
MN
149#ifdef STRICT_LIMITS
150 c->symCount++;
151#endif
152}
153
5e20f836
MN
154/**
155 *
156 * @return the number of bytes written
157 */
938dd846 158static int put_cabac_terminate(CABACContext *c, int bit){
d592f67f
MN
159 c->range -= 2;
160
161 if(!bit){
162 renorm_cabac_encoder(c);
163 }else{
164 c->low += c->range;
165 c->range= 2;
115329f1 166
d592f67f
MN
167 renorm_cabac_encoder(c);
168
169 assert(c->low <= 0x1FF);
170 put_cabac_bit(c, c->low>>9);
171 put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
115329f1 172
d592f67f
MN
173 flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
174 }
115329f1 175
d592f67f
MN
176#ifdef STRICT_LIMITS
177 c->symCount++;
178#endif
5e20f836 179
b46243ed 180 return (put_bits_count(&c->pb)+7)>>3;
d592f67f
MN
181}
182
61ccfcc0
MN
183/**
184 * put (truncated) unary binarization.
185 */
938dd846 186static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
61ccfcc0 187 int i;
115329f1 188
61ccfcc0 189 assert(v <= max);
115329f1 190
61ccfcc0
MN
191#if 1
192 for(i=0; i<v; i++){
193 put_cabac(c, state, 1);
194 if(i < max_index) state++;
195 }
196 if(truncated==0 || v<max)
197 put_cabac(c, state, 0);
198#else
199 if(v <= max_index){
200 for(i=0; i<v; i++){
201 put_cabac(c, state+i, 1);
202 }
203 if(truncated==0 || v<max)
204 put_cabac(c, state+i, 0);
205 }else{
206 for(i=0; i<=max_index; i++){
207 put_cabac(c, state+i, 1);
208 }
209 for(; i<v; i++){
210 put_cabac(c, state+max_index, 1);
211 }
212 if(truncated==0 || v<max)
213 put_cabac(c, state+max_index, 0);
214 }
215#endif
216}
217
218/**
219 * put unary exp golomb k-th order binarization.
220 */
938dd846 221static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
61ccfcc0 222 int i;
115329f1 223
61ccfcc0
MN
224 if(v==0)
225 put_cabac(c, state, 0);
226 else{
8f8c0800 227 const int sign= v < 0;
115329f1 228
c26abfa5 229 if(is_signed) v= FFABS(v);
115329f1 230
61ccfcc0
MN
231 if(v<max){
232 for(i=0; i<v; i++){
233 put_cabac(c, state, 1);
234 if(i < max_index) state++;
235 }
236
237 put_cabac(c, state, 0);
238 }else{
239 int m= 1<<k;
240
241 for(i=0; i<max; i++){
242 put_cabac(c, state, 1);
243 if(i < max_index) state++;
244 }
245
246 v -= max;
247 while(v >= m){ //FIXME optimize
248 put_cabac_bypass(c, 1);
249 v-= m;
250 m+= m;
251 }
252 put_cabac_bypass(c, 0);
253 while(m>>=1){
254 put_cabac_bypass(c, v&m);
255 }
256 }
257
258 if(is_signed)
259 put_cabac_bypass(c, sign);
260 }
261}
262
ec7eb896 263static void refill(CABACContext *c){
ec7eb896 264#if CABAC_BITS == 16
2ae7569d 265 c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
ec7eb896
MN
266#else
267 c->low+= c->bytestream[0]<<1;
268#endif
269 c->low -= CABAC_MASK;
270 c->bytestream+= CABAC_BITS/8;
271}
272
273static void refill2(CABACContext *c){
274 int i, x;
275
276 x= c->low ^ (c->low-1);
f24a5159 277 i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
ec7eb896
MN
278
279 x= -CABAC_MASK;
115329f1 280
ec7eb896
MN
281#if CABAC_BITS == 16
282 x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
283#else
284 x+= c->bytestream[0]<<1;
285#endif
115329f1 286
ec7eb896
MN
287 c->low += x<<i;
288 c->bytestream+= CABAC_BITS/8;
289}
ec7eb896 290
d592f67f 291static inline void renorm_cabac_decoder(CABACContext *c){
f24a5159 292 while(c->range < 0x100){
d592f67f
MN
293 c->range+= c->range;
294 c->low+= c->low;
ec7eb896
MN
295 if(!(c->low & CABAC_MASK))
296 refill(c);
d592f67f
MN
297 }
298}
299
ec7eb896 300static inline void renorm_cabac_decoder_once(CABACContext *c){
400d0f8e 301#ifdef ARCH_X86_DISABLED
ec8f483a
MN
302 int temp;
303#if 0
4310580d 304 //P3:683 athlon:475
ec8f483a 305 asm(
f24a5159 306 "lea -0x100(%0), %2 \n\t"
ec8f483a
MN
307 "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
308 "shl %%cl, %0 \n\t"
309 "shl %%cl, %1 \n\t"
310 : "+r"(c->range), "+r"(c->low), "+c"(temp)
311 );
312#elif 0
4310580d 313 //P3:680 athlon:474
ec8f483a 314 asm(
f24a5159 315 "cmp $0x100, %0 \n\t"
ec8f483a
MN
316 "setb %%cl \n\t" //FIXME 31->63 for x86-64
317 "shl %%cl, %0 \n\t"
318 "shl %%cl, %1 \n\t"
319 : "+r"(c->range), "+r"(c->low), "+c"(temp)
320 );
321#elif 1
322 int temp2;
4310580d 323 //P3:665 athlon:517
ec8f483a 324 asm(
f24a5159 325 "lea -0x100(%0), %%eax \n\t"
ec8f483a
MN
326 "cdq \n\t"
327 "mov %0, %%eax \n\t"
328 "and %%edx, %0 \n\t"
329 "and %1, %%edx \n\t"
330 "add %%eax, %0 \n\t"
331 "add %%edx, %1 \n\t"
332 : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
333 );
334#elif 0
335 int temp2;
4310580d 336 //P3:673 athlon:509
ec8f483a 337 asm(
f24a5159 338 "cmp $0x100, %0 \n\t"
ec8f483a
MN
339 "sbb %%edx, %%edx \n\t"
340 "mov %0, %%eax \n\t"
341 "and %%edx, %0 \n\t"
342 "and %1, %%edx \n\t"
343 "add %%eax, %0 \n\t"
344 "add %%edx, %1 \n\t"
345 : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
346 );
347#else
348 int temp2;
4310580d 349 //P3:677 athlon:511
ec8f483a 350 asm(
f24a5159 351 "cmp $0x100, %0 \n\t"
ec8f483a
MN
352 "lea (%0, %0), %%eax \n\t"
353 "lea (%1, %1), %%edx \n\t"
354 "cmovb %%eax, %0 \n\t"
355 "cmovb %%edx, %1 \n\t"
356 : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
357 );
358#endif
359#else
4310580d 360 //P3:675 athlon:476
f24a5159 361 int shift= (uint32_t)(c->range - 0x100)>>31;
bfe328ca
LM
362 c->range<<= shift;
363 c->low <<= shift;
ec8f483a 364#endif
ec7eb896
MN
365 if(!(c->low & CABAC_MASK))
366 refill(c);
367}
368
851ded89 369static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
bfe328ca 370 //FIXME gcc generates duplicate load/stores for c->low and c->range
f7d0b683
MN
371#define LOW "0"
372#define RANGE "4"
755073fe
RD
373#ifdef ARCH_X86_64
374#define BYTESTART "16"
375#define BYTE "24"
376#define BYTEEND "32"
377#else
d61c4e73
MN
378#define BYTESTART "12"
379#define BYTE "16"
380#define BYTEEND "20"
755073fe 381#endif
419b8784 382#if defined(ARCH_X86_32) && !(defined(PIC) && defined(__GNUC__))
ba9fb5da
BR
383 int bit;
384
0bc2e7f0 385#ifndef BRANCHLESS_CABAC_DECODER
f7d0b683 386 asm volatile(
4041a495 387 "movzbl (%1), %0 \n\t"
f7d0b683
MN
388 "movl "RANGE "(%2), %%ebx \n\t"
389 "movl "RANGE "(%2), %%edx \n\t"
f24a5159 390 "andl $0xC0, %%ebx \n\t"
4041a495 391 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
f7d0b683
MN
392 "movl "LOW "(%2), %%ebx \n\t"
393//eax:state ebx:low, edx:range, esi:RangeLPS
394 "subl %%esi, %%edx \n\t"
f24a5159
MN
395 "movl %%edx, %%ecx \n\t"
396 "shll $17, %%ecx \n\t"
397 "cmpl %%ecx, %%ebx \n\t"
f7d0b683 398 " ja 1f \n\t"
1f4d5e9f
MN
399
400#if 1
401 //athlon:4067 P3:4110
f24a5159 402 "lea -0x100(%%edx), %%ecx \n\t"
1f4d5e9f
MN
403 "shr $31, %%ecx \n\t"
404 "shl %%cl, %%edx \n\t"
405 "shl %%cl, %%ebx \n\t"
406#else
407 //athlon:4057 P3:4130
f24a5159 408 "cmp $0x100, %%edx \n\t" //FIXME avoidable
f7d0b683
MN
409 "setb %%cl \n\t"
410 "shl %%cl, %%edx \n\t"
411 "shl %%cl, %%ebx \n\t"
1f4d5e9f 412#endif
4041a495 413 "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx \n\t"
f7d0b683
MN
414 "movb %%cl, (%1) \n\t"
415//eax:state ebx:low, edx:range, esi:RangeLPS
416 "test %%bx, %%bx \n\t"
417 " jnz 2f \n\t"
755073fe 418 "mov "BYTE "(%2), %%"REG_S" \n\t"
f7d0b683 419 "subl $0xFFFF, %%ebx \n\t"
755073fe 420 "movzwl (%%"REG_S"), %%ecx \n\t"
f7d0b683
MN
421 "bswap %%ecx \n\t"
422 "shrl $15, %%ecx \n\t"
755073fe 423 "add $2, %%"REG_S" \n\t"
f7d0b683 424 "addl %%ecx, %%ebx \n\t"
755073fe 425 "mov %%"REG_S", "BYTE "(%2) \n\t"
f7d0b683
MN
426 "jmp 2f \n\t"
427 "1: \n\t"
428//eax:state ebx:low, edx:range, esi:RangeLPS
f24a5159 429 "subl %%ecx, %%ebx \n\t"
f7d0b683 430 "movl %%esi, %%edx \n\t"
a6672acf 431 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
f7d0b683
MN
432 "shll %%cl, %%ebx \n\t"
433 "shll %%cl, %%edx \n\t"
4041a495 434 "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx \n\t"
f24a5159 435 "movb %%cl, (%1) \n\t"
755073fe 436 "add $1, %0 \n\t"
f7d0b683
MN
437 "test %%bx, %%bx \n\t"
438 " jnz 2f \n\t"
439
755073fe
RD
440 "mov "BYTE "(%2), %%"REG_c" \n\t"
441 "movzwl (%%"REG_c"), %%esi \n\t"
f7d0b683
MN
442 "bswap %%esi \n\t"
443 "shrl $15, %%esi \n\t"
444 "subl $0xFFFF, %%esi \n\t"
755073fe
RD
445 "add $2, %%"REG_c" \n\t"
446 "mov %%"REG_c", "BYTE "(%2) \n\t"
f7d0b683
MN
447
448 "leal -1(%%ebx), %%ecx \n\t"
449 "xorl %%ebx, %%ecx \n\t"
f24a5159 450 "shrl $15, %%ecx \n\t"
a6672acf 451 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
d17faef0
MN
452 "neg %%ecx \n\t"
453 "add $7, %%ecx \n\t"
f7d0b683
MN
454
455 "shll %%cl , %%esi \n\t"
456 "addl %%esi, %%ebx \n\t"
457 "2: \n\t"
458 "movl %%edx, "RANGE "(%2) \n\t"
459 "movl %%ebx, "LOW "(%2) \n\t"
f7d0b683
MN
460 :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
461 :"r"(state), "r"(c)
755073fe 462 : "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory"
f7d0b683 463 );
9ed92c65 464 bit&=1;
a0490b32 465#else /* BRANCHLESS_CABAC_DECODER */
13404b2e
MN
466
467
94e4c3a3 468#if defined CMOV_IS_FAST
13404b2e
MN
469#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
470 "mov "tmp" , %%ecx \n\t"\
471 "shl $17 , "tmp" \n\t"\
472 "cmp "low" , "tmp" \n\t"\
473 "cmova %%ecx , "range" \n\t"\
474 "sbb %%ecx , %%ecx \n\t"\
475 "and %%ecx , "tmp" \n\t"\
476 "sub "tmp" , "low" \n\t"\
477 "xor %%ecx , "ret" \n\t"
a0490b32 478#else /* CMOV_IS_FAST */
13404b2e
MN
479#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
480 "mov "tmp" , %%ecx \n\t"\
481 "shl $17 , "tmp" \n\t"\
482 "sub "low" , "tmp" \n\t"\
483 "sar $31 , "tmp" \n\t" /*lps_mask*/\
484 "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
485 "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
486 "add %%ecx , "range" \n\t" /*new range*/\
487 "shl $17 , %%ecx \n\t"\
488 "and "tmp" , %%ecx \n\t"\
489 "sub %%ecx , "low" \n\t"\
490 "xor "tmp" , "ret" \n\t"
a0490b32 491#endif /* CMOV_IS_FAST */
ef0090a9 492
ef0090a9 493
13404b2e
MN
494#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
495 "movzbl "statep" , "ret" \n\t"\
496 "mov "range" , "tmp" \n\t"\
497 "and $0xC0 , "range" \n\t"\
498 "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
499 "sub "range" , "tmp" \n\t"\
500 BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
501 "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
502 "shl %%cl , "range" \n\t"\
503 "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
504 "mov "tmpbyte" , "statep" \n\t"\
505 "shl %%cl , "low" \n\t"\
506 "test "lowword" , "lowword" \n\t"\
507 " jnz 1f \n\t"\
755073fe
RD
508 "mov "BYTE"("cabac"), %%"REG_c" \n\t"\
509 "movzwl (%%"REG_c") , "tmp" \n\t"\
13404b2e
MN
510 "bswap "tmp" \n\t"\
511 "shr $15 , "tmp" \n\t"\
512 "sub $0xFFFF , "tmp" \n\t"\
755073fe
RD
513 "add $2 , %%"REG_c" \n\t"\
514 "mov %%"REG_c" , "BYTE "("cabac") \n\t"\
13404b2e
MN
515 "lea -1("low") , %%ecx \n\t"\
516 "xor "low" , %%ecx \n\t"\
517 "shr $15 , %%ecx \n\t"\
518 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
519 "neg %%ecx \n\t"\
520 "add $7 , %%ecx \n\t"\
521 "shl %%cl , "tmp" \n\t"\
522 "add "tmp" , "low" \n\t"\
523 "1: \n\t"
ef0090a9 524
13404b2e
MN
525 asm volatile(
526 "movl "RANGE "(%2), %%esi \n\t"
527 "movl "LOW "(%2), %%ebx \n\t"
528 BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
f24a5159 529 "movl %%esi, "RANGE "(%2) \n\t"
b99f3cab 530 "movl %%ebx, "LOW "(%2) \n\t"
ef0090a9 531
ef0090a9
MN
532 :"=&a"(bit)
533 :"r"(state), "r"(c)
755073fe 534 : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
ef0090a9 535 );
f1b37db4 536 bit&=1;
a0490b32 537#endif /* BRANCHLESS_CABAC_DECODER */
419b8784 538#else /* defined(ARCH_X86_32) && !(defined(PIC) && defined(__GNUC__)) */
bfe328ca 539 int s = *state;
a0f2c6ba 540 int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
88730be6 541 int bit, lps_mask attribute_unused;
115329f1 542
d592f67f 543 c->range -= RangeLPS;
0bc2e7f0 544#ifndef BRANCHLESS_CABAC_DECODER
f24a5159 545 if(c->low < (c->range<<17)){
bfe328ca 546 bit= s&1;
d61c4e73 547 *state= ff_h264_mps_state[s];
ec7eb896 548 renorm_cabac_decoder_once(c);
d592f67f 549 }else{
f24a5159
MN
550 bit= ff_h264_norm_shift[RangeLPS];
551 c->low -= (c->range<<17);
d61c4e73 552 *state= ff_h264_lps_state[s];
260ceb63
MN
553 c->range = RangeLPS<<bit;
554 c->low <<= bit;
555 bit= (s&1)^1;
556
ec7eb896
MN
557 if(!(c->low & 0xFFFF)){
558 refill2(c);
260ceb63 559 }
d592f67f 560 }
a0490b32 561#else /* BRANCHLESS_CABAC_DECODER */
f24a5159 562 lps_mask= ((c->range<<17) - c->low)>>31;
115329f1 563
f24a5159 564 c->low -= (c->range<<17) & lps_mask;
ec7eb896 565 c->range += (RangeLPS - c->range) & lps_mask;
115329f1 566
2e1aee80 567 s^=lps_mask;
68a205ed 568 *state= (ff_h264_mlps_state+128)[s];
2e1aee80 569 bit= s&1;
115329f1 570
f24a5159 571 lps_mask= ff_h264_norm_shift[c->range];
ec7eb896
MN
572 c->range<<= lps_mask;
573 c->low <<= lps_mask;
574 if(!(c->low & CABAC_MASK))
575 refill2(c);
a0490b32 576#endif /* BRANCHLESS_CABAC_DECODER */
419b8784 577#endif /* defined(ARCH_X86_32) && !(defined(PIC) && defined(__GNUC__)) */
115329f1 578 return bit;
d592f67f
MN
579}
580
851ded89
MN
581static int __attribute((noinline)) get_cabac_noinline(CABACContext *c, uint8_t * const state){
582 return get_cabac_inline(c,state);
583}
584
585static int get_cabac(CABACContext *c, uint8_t * const state){
586 return get_cabac_inline(c,state);
587}
588
938dd846 589static int get_cabac_bypass(CABACContext *c){
ebd624b6
MN
590#if 0 //not faster
591 int bit;
592 asm volatile(
593 "movl "RANGE "(%1), %%ebx \n\t"
594 "movl "LOW "(%1), %%eax \n\t"
595 "shl $17, %%ebx \n\t"
596 "add %%eax, %%eax \n\t"
597 "sub %%ebx, %%eax \n\t"
598 "cdq \n\t"
599 "and %%edx, %%ebx \n\t"
600 "add %%ebx, %%eax \n\t"
601 "test %%ax, %%ax \n\t"
602 " jnz 1f \n\t"
755073fe 603 "movl "BYTE "(%1), %%"REG_b" \n\t"
ebd624b6 604 "subl $0xFFFF, %%eax \n\t"
755073fe 605 "movzwl (%%"REG_b"), %%ecx \n\t"
ebd624b6
MN
606 "bswap %%ecx \n\t"
607 "shrl $15, %%ecx \n\t"
755073fe 608 "addl $2, %%"REG_b" \n\t"
ebd624b6 609 "addl %%ecx, %%eax \n\t"
755073fe 610 "movl %%"REG_b", "BYTE "(%1) \n\t"
ebd624b6
MN
611 "1: \n\t"
612 "movl %%eax, "LOW "(%1) \n\t"
613
614 :"=&d"(bit)
615 :"r"(c)
755073fe 616 : "%eax", "%"REG_b, "%ecx", "memory"
ebd624b6
MN
617 );
618 return bit+1;
619#else
f24a5159 620 int range;
d592f67f
MN
621 c->low += c->low;
622
ec7eb896
MN
623 if(!(c->low & CABAC_MASK))
624 refill(c);
115329f1 625
f24a5159
MN
626 range= c->range<<17;
627 if(c->low < range){
d592f67f
MN
628 return 0;
629 }else{
f24a5159 630 c->low -= range;
d592f67f
MN
631 return 1;
632 }
ebd624b6
MN
633#endif
634}
635
636
637static always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
638#ifdef ARCH_X86
ebd624b6
MN
639 asm volatile(
640 "movl "RANGE "(%1), %%ebx \n\t"
641 "movl "LOW "(%1), %%eax \n\t"
642 "shl $17, %%ebx \n\t"
643 "add %%eax, %%eax \n\t"
644 "sub %%ebx, %%eax \n\t"
645 "cdq \n\t"
646 "and %%edx, %%ebx \n\t"
647 "add %%ebx, %%eax \n\t"
648 "xor %%edx, %%ecx \n\t"
649 "sub %%edx, %%ecx \n\t"
650 "test %%ax, %%ax \n\t"
651 " jnz 1f \n\t"
755073fe 652 "mov "BYTE "(%1), %%"REG_b" \n\t"
ebd624b6 653 "subl $0xFFFF, %%eax \n\t"
755073fe 654 "movzwl (%%"REG_b"), %%edx \n\t"
ebd624b6
MN
655 "bswap %%edx \n\t"
656 "shrl $15, %%edx \n\t"
755073fe 657 "add $2, %%"REG_b" \n\t"
ebd624b6 658 "addl %%edx, %%eax \n\t"
755073fe 659 "mov %%"REG_b", "BYTE "(%1) \n\t"
ebd624b6
MN
660 "1: \n\t"
661 "movl %%eax, "LOW "(%1) \n\t"
662
663 :"+c"(val)
664 :"r"(c)
755073fe 665 : "%eax", "%"REG_b, "%edx", "memory"
ebd624b6
MN
666 );
667 return val;
668#else
669 int range, mask;
670 c->low += c->low;
671
672 if(!(c->low & CABAC_MASK))
673 refill(c);
674
675 range= c->range<<17;
676 c->low -= range;
677 mask= c->low >> 31;
678 range &= mask;
679 c->low += range;
680 return (val^mask)-mask;
681#endif
d592f67f 682}
ebd624b6 683
eb73bf72
MN
684//FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
685//FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard)
419b8784 686#if defined(ARCH_X86_32) && !(defined(PIC) && defined(__GNUC__))
eb73bf72
MN
687static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){
688 void *end= significant_coeff_ctx_base + max_coeff - 1;
689 int minusstart= -(int)significant_coeff_ctx_base;
849a5004 690 int minusindex= 4-(int)index;
eb73bf72
MN
691 int coeff_count;
692 asm volatile(
693 "movl "RANGE "(%3), %%esi \n\t"
694 "movl "LOW "(%3), %%ebx \n\t"
695
696 "2: \n\t"
697
a616db28 698 BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
eb73bf72 699
a616db28 700 "test $1, %%edx \n\t"
eb73bf72
MN
701 " jz 3f \n\t"
702
a616db28
MN
703 BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
704
755073fe 705 "mov %2, %%"REG_a" \n\t"
eb73bf72 706 "movl %4, %%ecx \n\t"
755073fe
RD
707 "add %1, %%"REG_c" \n\t"
708 "movl %%ecx, (%%"REG_a") \n\t"
eb73bf72 709
a616db28 710 "test $1, %%edx \n\t"
eb73bf72
MN
711 " jnz 4f \n\t"
712
755073fe
RD
713 "add $4, %%"REG_a" \n\t"
714 "mov %%"REG_a", %2 \n\t"
d3e7c5c3 715
eb73bf72 716 "3: \n\t"
755073fe
RD
717 "add $1, %1 \n\t"
718 "cmp %5, %1 \n\t"
eb73bf72 719 " jb 2b \n\t"
755073fe 720 "mov %2, %%"REG_a" \n\t"
eb73bf72 721 "movl %4, %%ecx \n\t"
755073fe
RD
722 "add %1, %%"REG_c" \n\t"
723 "movl %%ecx, (%%"REG_a") \n\t"
eb73bf72 724 "4: \n\t"
755073fe 725 "add %6, %%eax \n\t"
eb73bf72
MN
726 "shr $2, %%eax \n\t"
727
728 "movl %%esi, "RANGE "(%3) \n\t"
729 "movl %%ebx, "LOW "(%3) \n\t"
730 :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\
731 :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\
755073fe 732 : "%"REG_c, "%ebx", "%edx", "%esi", "memory"\
eb73bf72
MN
733 );
734 return coeff_count;
735}
e08f5806
MN
736
737static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){
738 int minusindex= 4-(int)index;
739 int coeff_count;
755073fe 740 long last=0;
e08f5806
MN
741 asm volatile(
742 "movl "RANGE "(%3), %%esi \n\t"
743 "movl "LOW "(%3), %%ebx \n\t"
744
755073fe 745 "mov %1, %%"REG_D" \n\t"
e08f5806
MN
746 "2: \n\t"
747
755073fe
RD
748 "mov %6, %%"REG_a" \n\t"
749 "movzbl (%%"REG_a", %%"REG_D"), %%edi \n\t"
750 "add %5, %%"REG_D" \n\t"
e08f5806 751
755073fe 752 BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
e08f5806
MN
753
754 "mov %1, %%edi \n\t"
755 "test $1, %%edx \n\t"
756 " jz 3f \n\t"
757
758 "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
755073fe 759 "add %5, %%"REG_D" \n\t"
e08f5806 760
755073fe 761 BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
e08f5806 762
755073fe 763 "mov %2, %%"REG_a" \n\t"
e08f5806 764 "mov %1, %%edi \n\t"
755073fe 765 "movl %%edi, (%%"REG_a") \n\t"
e08f5806
MN
766
767 "test $1, %%edx \n\t"
768 " jnz 4f \n\t"
769
755073fe
RD
770 "add $4, %%"REG_a" \n\t"
771 "mov %%"REG_a", %2 \n\t"
e08f5806
MN
772
773 "3: \n\t"
774 "addl $1, %%edi \n\t"
775 "mov %%edi, %1 \n\t"
776 "cmpl $63, %%edi \n\t"
777 " jb 2b \n\t"
755073fe
RD
778 "mov %2, %%"REG_a" \n\t"
779 "movl %%edi, (%%"REG_a") \n\t"
e08f5806
MN
780 "4: \n\t"
781 "addl %4, %%eax \n\t"
782 "shr $2, %%eax \n\t"
783
784 "movl %%esi, "RANGE "(%3) \n\t"
785 "movl %%ebx, "LOW "(%3) \n\t"
786 :"=&a"(coeff_count),"+m"(last), "+m"(index)\
787 :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\
755073fe 788 : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"\
e08f5806
MN
789 );
790 return coeff_count;
791}
419b8784 792#endif /* defined(ARCH_X86_32) && !(defined(PIC) && defined(__GNUC__)) */
d592f67f 793
5e20f836
MN
794/**
795 *
796 * @return the number of bytes read or 0 if no end
797 */
938dd846 798static int get_cabac_terminate(CABACContext *c){
f24a5159
MN
799 c->range -= 2;
800 if(c->low < c->range<<17){
ec7eb896 801 renorm_cabac_decoder_once(c);
d592f67f
MN
802 return 0;
803 }else{
5e20f836 804 return c->bytestream - c->bytestream_start;
115329f1 805 }
d592f67f
MN
806}
807
61ccfcc0
MN
808/**
809 * get (truncated) unnary binarization.
810 */
938dd846 811static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
61ccfcc0 812 int i;
115329f1
DB
813
814 for(i=0; i<max; i++){
61ccfcc0
MN
815 if(get_cabac(c, state)==0)
816 return i;
115329f1 817
61ccfcc0
MN
818 if(i< max_index) state++;
819 }
820
821 return truncated ? max : -1;
822}
823
824/**
825 * get unary exp golomb k-th order binarization.
826 */
938dd846 827static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
61ccfcc0
MN
828 int i, v;
829 int m= 1<<k;
115329f1
DB
830
831 if(get_cabac(c, state)==0)
61ccfcc0 832 return 0;
115329f1 833
61ccfcc0 834 if(0 < max_index) state++;
115329f1
DB
835
836 for(i=1; i<max; i++){
61ccfcc0
MN
837 if(get_cabac(c, state)==0){
838 if(is_signed && get_cabac_bypass(c)){
839 return -i;
840 }else
841 return i;
842 }
843
844 if(i < max_index) state++;
845 }
115329f1 846
61ccfcc0
MN
847 while(get_cabac_bypass(c)){
848 i+= m;
849 m+= m;
850 }
115329f1 851
61ccfcc0
MN
852 v=0;
853 while(m>>=1){
854 v+= v + get_cabac_bypass(c);
855 }
856 i += v;
857
858 if(is_signed && get_cabac_bypass(c)){
859 return -i;
860 }else
861 return i;
862}