Commit | Line | Data |
---|---|---|
52cb7981 JD |
1 | /* |
2 | * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | |
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | /** | |
23 | * @file h264_i386.h | |
24 | * H.264 / AVC / MPEG4 part10 codec. | |
25 | * non-MMX i386-specific optimizations for H.264 | |
26 | * @author Michael Niedermayer <michaelni@gmx.at> | |
27 | */ | |
28 | ||
29 | #ifndef FFMPEG_H264_I386_H | |
30 | #define FFMPEG_H264_I386_H | |
31 | ||
32 | ||
33 | #include "cabac.h" | |
34 | ||
35 | //FIXME use some macros to avoid duplicatin get_cabac (cannot be done yet as that would make optimization work hard) | |
36 | #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) | |
37 | static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){ | |
38 | void *end= significant_coeff_ctx_base + max_coeff - 1; | |
39 | int minusstart= -(int)significant_coeff_ctx_base; | |
40 | int minusindex= 4-(int)index; | |
41 | int coeff_count; | |
42 | asm volatile( | |
43 | "movl "RANGE "(%3), %%esi \n\t" | |
44 | "movl "LOW "(%3), %%ebx \n\t" | |
45 | ||
46 | "2: \n\t" | |
47 | ||
48 | BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") | |
49 | ||
50 | "test $1, %%edx \n\t" | |
51 | " jz 3f \n\t" | |
52 | ||
53 | BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") | |
54 | ||
55 | "mov %2, %%"REG_a" \n\t" | |
56 | "movl %4, %%ecx \n\t" | |
57 | "add %1, %%"REG_c" \n\t" | |
58 | "movl %%ecx, (%%"REG_a") \n\t" | |
59 | ||
60 | "test $1, %%edx \n\t" | |
61 | " jnz 4f \n\t" | |
62 | ||
63 | "add $4, %%"REG_a" \n\t" | |
64 | "mov %%"REG_a", %2 \n\t" | |
65 | ||
66 | "3: \n\t" | |
67 | "add $1, %1 \n\t" | |
68 | "cmp %5, %1 \n\t" | |
69 | " jb 2b \n\t" | |
70 | "mov %2, %%"REG_a" \n\t" | |
71 | "movl %4, %%ecx \n\t" | |
72 | "add %1, %%"REG_c" \n\t" | |
73 | "movl %%ecx, (%%"REG_a") \n\t" | |
74 | "4: \n\t" | |
75 | "add %6, %%eax \n\t" | |
76 | "shr $2, %%eax \n\t" | |
77 | ||
78 | "movl %%esi, "RANGE "(%3) \n\t" | |
79 | "movl %%ebx, "LOW "(%3) \n\t" | |
80 | :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index) | |
81 | :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex) | |
82 | : "%"REG_c, "%ebx", "%edx", "%esi", "memory" | |
83 | ); | |
84 | return coeff_count; | |
85 | } | |
86 | ||
87 | static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, const uint8_t *sig_off){ | |
88 | int minusindex= 4-(int)index; | |
89 | int coeff_count; | |
90 | long last=0; | |
91 | asm volatile( | |
92 | "movl "RANGE "(%3), %%esi \n\t" | |
93 | "movl "LOW "(%3), %%ebx \n\t" | |
94 | ||
95 | "mov %1, %%"REG_D" \n\t" | |
96 | "2: \n\t" | |
97 | ||
98 | "mov %6, %%"REG_a" \n\t" | |
99 | "movzbl (%%"REG_a", %%"REG_D"), %%edi \n\t" | |
100 | "add %5, %%"REG_D" \n\t" | |
101 | ||
102 | BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") | |
103 | ||
104 | "mov %1, %%edi \n\t" | |
105 | "test $1, %%edx \n\t" | |
106 | " jz 3f \n\t" | |
107 | ||
108 | "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t" | |
109 | "add %5, %%"REG_D" \n\t" | |
110 | ||
111 | BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") | |
112 | ||
113 | "mov %2, %%"REG_a" \n\t" | |
114 | "mov %1, %%edi \n\t" | |
115 | "movl %%edi, (%%"REG_a") \n\t" | |
116 | ||
117 | "test $1, %%edx \n\t" | |
118 | " jnz 4f \n\t" | |
119 | ||
120 | "add $4, %%"REG_a" \n\t" | |
121 | "mov %%"REG_a", %2 \n\t" | |
122 | ||
123 | "3: \n\t" | |
124 | "addl $1, %%edi \n\t" | |
125 | "mov %%edi, %1 \n\t" | |
126 | "cmpl $63, %%edi \n\t" | |
127 | " jb 2b \n\t" | |
128 | "mov %2, %%"REG_a" \n\t" | |
129 | "movl %%edi, (%%"REG_a") \n\t" | |
130 | "4: \n\t" | |
131 | "addl %4, %%eax \n\t" | |
132 | "shr $2, %%eax \n\t" | |
133 | ||
134 | "movl %%esi, "RANGE "(%3) \n\t" | |
135 | "movl %%ebx, "LOW "(%3) \n\t" | |
136 | :"=&a"(coeff_count),"+m"(last), "+m"(index) | |
137 | :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off) | |
138 | : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory" | |
139 | ); | |
140 | return coeff_count; | |
141 | } | |
142 | #endif /* defined(ARCH_X86) && && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */ | |
143 | ||
144 | #endif /* FFMPEG_H264_I386_H */ |