Commit | Line | Data |
---|---|---|
52cb7981 JD |
1 | /* |
2 | * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | |
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 | * | |
2912e87a | 5 | * This file is part of Libav. |
52cb7981 | 6 | * |
2912e87a | 7 | * Libav is free software; you can redistribute it and/or |
52cb7981 JD |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
2912e87a | 12 | * Libav is distributed in the hope that it will be useful, |
52cb7981 JD |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
2912e87a | 18 | * License along with Libav; if not, write to the Free Software |
52cb7981 JD |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ | |
21 | ||
22 | /** | |
ba87f080 | 23 | * @file |
52cb7981 JD |
24 | * H.264 / AVC / MPEG4 part10 codec. |
25 | * non-MMX i386-specific optimizations for H.264 | |
26 | * @author Michael Niedermayer <michaelni@gmx.at> | |
27 | */ | |
28 | ||
a6493a8f DB |
29 | #ifndef AVCODEC_X86_H264_I386_H |
30 | #define AVCODEC_X86_H264_I386_H | |
52cb7981 | 31 | |
6b712acc MR |
32 | #include <stddef.h> |
33 | ||
245976da | 34 | #include "libavcodec/cabac.h" |
52cb7981 | 35 | |
591d87ba JD |
36 | //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet |
37 | //as that would make optimization work hard) | |
018c3383 | 38 | #if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) |
591d87ba JD |
39 | static int decode_significance_x86(CABACContext *c, int max_coeff, |
40 | uint8_t *significant_coeff_ctx_base, | |
c90b9442 | 41 | int *index, x86_reg last_off){ |
52cb7981 JD |
42 | void *end= significant_coeff_ctx_base + max_coeff - 1; |
43 | int minusstart= -(int)significant_coeff_ctx_base; | |
44 | int minusindex= 4-(int)index; | |
45 | int coeff_count; | |
018c3383 | 46 | int low; |
be449fca | 47 | __asm__ volatile( |
018c3383 MR |
48 | "movl %a9(%4), %%esi \n\t" |
49 | "movl %a10(%4), %3 \n\t" | |
52cb7981 JD |
50 | |
51 | "2: \n\t" | |
52 | ||
018c3383 MR |
53 | BRANCHLESS_GET_CABAC("%%edx", "%4", "(%1)", "%3", |
54 | "%w3", "%%esi", "%%eax", "%%al", "%a11") | |
52cb7981 JD |
55 | |
56 | "test $1, %%edx \n\t" | |
57 | " jz 3f \n\t" | |
018c3383 | 58 | "add %8, %1 \n\t" |
52cb7981 | 59 | |
018c3383 MR |
60 | BRANCHLESS_GET_CABAC("%%edx", "%4", "(%1)", "%3", |
61 | "%w3", "%%esi", "%%eax", "%%al", "%a11") | |
52cb7981 | 62 | |
018c3383 | 63 | "sub %8, %1 \n\t" |
52cb7981 | 64 | "mov %2, %%"REG_a" \n\t" |
018c3383 | 65 | "movl %5, %%ecx \n\t" |
52cb7981 JD |
66 | "add %1, %%"REG_c" \n\t" |
67 | "movl %%ecx, (%%"REG_a") \n\t" | |
68 | ||
69 | "test $1, %%edx \n\t" | |
70 | " jnz 4f \n\t" | |
71 | ||
72 | "add $4, %%"REG_a" \n\t" | |
73 | "mov %%"REG_a", %2 \n\t" | |
74 | ||
75 | "3: \n\t" | |
76 | "add $1, %1 \n\t" | |
018c3383 | 77 | "cmp %6, %1 \n\t" |
52cb7981 JD |
78 | " jb 2b \n\t" |
79 | "mov %2, %%"REG_a" \n\t" | |
018c3383 | 80 | "movl %5, %%ecx \n\t" |
52cb7981 JD |
81 | "add %1, %%"REG_c" \n\t" |
82 | "movl %%ecx, (%%"REG_a") \n\t" | |
83 | "4: \n\t" | |
018c3383 | 84 | "add %7, %%eax \n\t" |
52cb7981 JD |
85 | "shr $2, %%eax \n\t" |
86 | ||
018c3383 MR |
87 | "movl %%esi, %a9(%4) \n\t" |
88 | "movl %3, %a10(%4) \n\t" | |
89 | :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index), | |
90 | "=&r"(low) | |
6b712acc MR |
91 | :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), |
92 | "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), | |
93 | "i"(offsetof(CABACContext, bytestream)) | |
018c3383 | 94 | : "%"REG_c, "%edx", "%esi", "memory" |
52cb7981 JD |
95 | ); |
96 | return coeff_count; | |
97 | } | |
98 | ||
591d87ba JD |
99 | static int decode_significance_8x8_x86(CABACContext *c, |
100 | uint8_t *significant_coeff_ctx_base, | |
c90b9442 | 101 | int *index, x86_reg last_off, const uint8_t *sig_off){ |
52cb7981 JD |
102 | int minusindex= 4-(int)index; |
103 | int coeff_count; | |
018c3383 | 104 | int low; |
40d0e665 | 105 | x86_reg last=0; |
be449fca | 106 | __asm__ volatile( |
018c3383 MR |
107 | "movl %a9(%4), %%esi \n\t" |
108 | "movl %a10(%4), %3 \n\t" | |
52cb7981 JD |
109 | |
110 | "mov %1, %%"REG_D" \n\t" | |
111 | "2: \n\t" | |
112 | ||
018c3383 | 113 | "mov %7, %%"REG_a" \n\t" |
52cb7981 | 114 | "movzbl (%%"REG_a", %%"REG_D"), %%edi \n\t" |
018c3383 | 115 | "add %6, %%"REG_D" \n\t" |
52cb7981 | 116 | |
018c3383 MR |
117 | BRANCHLESS_GET_CABAC("%%edx", "%4", "(%%"REG_D")", "%3", |
118 | "%w3", "%%esi", "%%eax", "%%al", "%a11") | |
52cb7981 JD |
119 | |
120 | "mov %1, %%edi \n\t" | |
121 | "test $1, %%edx \n\t" | |
122 | " jz 3f \n\t" | |
123 | ||
124 | "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t" | |
018c3383 MR |
125 | "add %6, %%"REG_D" \n\t" |
126 | "add %8, %%"REG_D" \n\t" | |
52cb7981 | 127 | |
018c3383 MR |
128 | BRANCHLESS_GET_CABAC("%%edx", "%4", "(%%"REG_D")", "%3", |
129 | "%w3", "%%esi", "%%eax", "%%al", "%a11") | |
52cb7981 JD |
130 | |
131 | "mov %2, %%"REG_a" \n\t" | |
132 | "mov %1, %%edi \n\t" | |
133 | "movl %%edi, (%%"REG_a") \n\t" | |
134 | ||
135 | "test $1, %%edx \n\t" | |
136 | " jnz 4f \n\t" | |
137 | ||
138 | "add $4, %%"REG_a" \n\t" | |
139 | "mov %%"REG_a", %2 \n\t" | |
140 | ||
141 | "3: \n\t" | |
142 | "addl $1, %%edi \n\t" | |
143 | "mov %%edi, %1 \n\t" | |
144 | "cmpl $63, %%edi \n\t" | |
145 | " jb 2b \n\t" | |
146 | "mov %2, %%"REG_a" \n\t" | |
147 | "movl %%edi, (%%"REG_a") \n\t" | |
148 | "4: \n\t" | |
018c3383 | 149 | "addl %5, %%eax \n\t" |
52cb7981 JD |
150 | "shr $2, %%eax \n\t" |
151 | ||
018c3383 MR |
152 | "movl %%esi, %a9(%4) \n\t" |
153 | "movl %3, %a10(%4) \n\t" | |
154 | :"=&a"(coeff_count),"+m"(last), "+m"(index), "=&r"(low) | |
6b712acc MR |
155 | :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off), |
156 | "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), | |
157 | "i"(offsetof(CABACContext, bytestream)) | |
018c3383 | 158 | : "%"REG_c, "%edx", "%esi", "%"REG_D, "memory" |
52cb7981 JD |
159 | ); |
160 | return coeff_count; | |
161 | } | |
018c3383 | 162 | #endif /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ |
52cb7981 | 163 | |
a6493a8f | 164 | #endif /* AVCODEC_X86_H264_I386_H */ |