x86: h264: remove hardcoded edx in decode_significance[_8x8]_x86()
[libav.git] / libavcodec / x86 / h264_i386.h
CommitLineData
52cb7981
JD
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
2912e87a 5 * This file is part of Libav.
52cb7981 6 *
2912e87a 7 * Libav is free software; you can redistribute it and/or
52cb7981
JD
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
2912e87a 12 * Libav is distributed in the hope that it will be useful,
52cb7981
JD
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
2912e87a 18 * License along with Libav; if not, write to the Free Software
52cb7981
JD
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
ba87f080 23 * @file
52cb7981
JD
24 * H.264 / AVC / MPEG4 part10 codec.
25 * non-MMX i386-specific optimizations for H.264
26 * @author Michael Niedermayer <michaelni@gmx.at>
27 */
28
a6493a8f
DB
29#ifndef AVCODEC_X86_H264_I386_H
30#define AVCODEC_X86_H264_I386_H
52cb7981 31
6b712acc
MR
32#include <stddef.h>
33
245976da 34#include "libavcodec/cabac.h"
52cb7981 35
591d87ba
JD
36//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
37//as that would make optimization work hard)
018c3383 38#if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
591d87ba
JD
39static int decode_significance_x86(CABACContext *c, int max_coeff,
40 uint8_t *significant_coeff_ctx_base,
c90b9442 41 int *index, x86_reg last_off){
52cb7981
JD
42 void *end= significant_coeff_ctx_base + max_coeff - 1;
43 int minusstart= -(int)significant_coeff_ctx_base;
44 int minusindex= 4-(int)index;
3fc4e36c 45 int bit;
e4b5a204 46 x86_reg coeff_count;
018c3383 47 int low;
be449fca 48 __asm__ volatile(
3fc4e36c
MR
49 "movl %a10(%5), %%esi \n\t"
50 "movl %a11(%5), %3 \n\t"
52cb7981
JD
51
52 "2: \n\t"
53
3fc4e36c
MR
54 BRANCHLESS_GET_CABAC("%4", "%5", "(%1)", "%3",
55 "%w3", "%%esi", "%k0", "%b0", "%a12")
52cb7981 56
3fc4e36c 57 "test $1, %4 \n\t"
52cb7981 58 " jz 3f \n\t"
3fc4e36c 59 "add %9, %1 \n\t"
52cb7981 60
3fc4e36c
MR
61 BRANCHLESS_GET_CABAC("%4", "%5", "(%1)", "%3",
62 "%w3", "%%esi", "%k0", "%b0", "%a12")
52cb7981 63
3fc4e36c 64 "sub %9, %1 \n\t"
e4b5a204 65 "mov %2, %0 \n\t"
3fc4e36c 66 "movl %6, %%ecx \n\t"
52cb7981 67 "add %1, %%"REG_c" \n\t"
e4b5a204 68 "movl %%ecx, (%0) \n\t"
52cb7981 69
3fc4e36c 70 "test $1, %4 \n\t"
52cb7981
JD
71 " jnz 4f \n\t"
72
e4b5a204
MR
73 "add $4, %0 \n\t"
74 "mov %0, %2 \n\t"
52cb7981
JD
75
76 "3: \n\t"
77 "add $1, %1 \n\t"
3fc4e36c 78 "cmp %7, %1 \n\t"
52cb7981 79 " jb 2b \n\t"
e4b5a204 80 "mov %2, %0 \n\t"
3fc4e36c 81 "movl %6, %%ecx \n\t"
52cb7981 82 "add %1, %%"REG_c" \n\t"
e4b5a204 83 "movl %%ecx, (%0) \n\t"
52cb7981 84 "4: \n\t"
3fc4e36c 85 "add %8, %k0 \n\t"
e4b5a204 86 "shr $2, %k0 \n\t"
52cb7981 87
3fc4e36c
MR
88 "movl %%esi, %a10(%5) \n\t"
89 "movl %3, %a11(%5) \n\t"
e4b5a204 90 :"=&r"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
3fc4e36c 91 "=&r"(low), "=&r"(bit)
6b712acc
MR
92 :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
93 "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
94 "i"(offsetof(CABACContext, bytestream))
3fc4e36c 95 : "%"REG_c, "%esi", "memory"
52cb7981
JD
96 );
97 return coeff_count;
98}
99
591d87ba
JD
100static int decode_significance_8x8_x86(CABACContext *c,
101 uint8_t *significant_coeff_ctx_base,
c90b9442 102 int *index, x86_reg last_off, const uint8_t *sig_off){
52cb7981 103 int minusindex= 4-(int)index;
3fc4e36c 104 int bit;
e4b5a204 105 x86_reg coeff_count;
018c3383 106 int low;
40d0e665 107 x86_reg last=0;
be449fca 108 __asm__ volatile(
3fc4e36c
MR
109 "movl %a10(%5), %%esi \n\t"
110 "movl %a11(%5), %3 \n\t"
52cb7981
JD
111
112 "mov %1, %%"REG_D" \n\t"
113 "2: \n\t"
114
3fc4e36c 115 "mov %8, %0 \n\t"
e4b5a204 116 "movzbl (%0, %%"REG_D"), %%edi \n\t"
3fc4e36c 117 "add %7, %%"REG_D" \n\t"
52cb7981 118
3fc4e36c
MR
119 BRANCHLESS_GET_CABAC("%4", "%5", "(%%"REG_D")", "%3",
120 "%w3", "%%esi", "%k0", "%b0", "%a12")
52cb7981
JD
121
122 "mov %1, %%edi \n\t"
3fc4e36c 123 "test $1, %4 \n\t"
52cb7981
JD
124 " jz 3f \n\t"
125
126 "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
3fc4e36c
MR
127 "add %7, %%"REG_D" \n\t"
128 "add %9, %%"REG_D" \n\t"
52cb7981 129
3fc4e36c
MR
130 BRANCHLESS_GET_CABAC("%4", "%5", "(%%"REG_D")", "%3",
131 "%w3", "%%esi", "%k0", "%b0", "%a12")
52cb7981 132
e4b5a204 133 "mov %2, %0 \n\t"
52cb7981 134 "mov %1, %%edi \n\t"
e4b5a204 135 "movl %%edi, (%0) \n\t"
52cb7981 136
3fc4e36c 137 "test $1, %4 \n\t"
52cb7981
JD
138 " jnz 4f \n\t"
139
e4b5a204
MR
140 "add $4, %0 \n\t"
141 "mov %0, %2 \n\t"
52cb7981
JD
142
143 "3: \n\t"
144 "addl $1, %%edi \n\t"
145 "mov %%edi, %1 \n\t"
146 "cmpl $63, %%edi \n\t"
147 " jb 2b \n\t"
e4b5a204
MR
148 "mov %2, %0 \n\t"
149 "movl %%edi, (%0) \n\t"
52cb7981 150 "4: \n\t"
3fc4e36c 151 "addl %6, %k0 \n\t"
e4b5a204 152 "shr $2, %k0 \n\t"
52cb7981 153
3fc4e36c
MR
154 "movl %%esi, %a10(%5) \n\t"
155 "movl %3, %a11(%5) \n\t"
156 :"=&r"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit)
6b712acc
MR
157 :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off),
158 "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
159 "i"(offsetof(CABACContext, bytestream))
3fc4e36c 160 : "%"REG_c, "%esi", "%"REG_D, "memory"
52cb7981
JD
161 );
162 return coeff_count;
163}
018c3383 164#endif /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
52cb7981 165
a6493a8f 166#endif /* AVCODEC_X86_H264_I386_H */