FFmpeg
h264_cabac.c
Go to the documentation of this file.
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * H.264 / AVC / MPEG-4 part10 codec.
25  * non-SIMD x86-specific optimizations for H.264
26  * @author Michael Niedermayer <michaelni@gmx.at>
27  */
28 
29 #include <stddef.h>
30 
31 #include "libavcodec/cabac.h"
32 #include "cabac.h"
33 
34 #if HAVE_INLINE_ASM
35 
36 #if ARCH_X86_64
37 #define REG64 "r"
38 #else
39 #define REG64 "m"
40 #endif
41 
42 //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
43 //as that would make optimization work hard)
44 #if HAVE_7REGS && !BROKEN_COMPILER
45 #define decode_significance decode_significance_x86
46 static int decode_significance_x86(CABACContext *c, int max_coeff,
47  uint8_t *significant_coeff_ctx_base,
48  int *index, x86_reg last_off){
49  void *end= significant_coeff_ctx_base + max_coeff - 1;
50  int minusstart= -(intptr_t)significant_coeff_ctx_base;
51  int minusindex= 4-(intptr_t)index;
52  int bit;
53  x86_reg coeff_count;
54 
55 #ifdef BROKEN_RELOCATIONS
56  void *tables;
57 
58  __asm__ volatile(
59  "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
60  : "=&r"(tables)
62  );
63 #endif
64 
65  __asm__ volatile(
66  "3: \n\t"
67 
68  BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
69  "%5", "%q5", "%k0", "%b0",
70  "%c11(%6)", "%c12(%6)",
74  "%13")
75 
76  "test $1, %4 \n\t"
77  " jz 4f \n\t"
78  "add %10, %1 \n\t"
79 
80  BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
81  "%5", "%q5", "%k0", "%b0",
82  "%c11(%6)", "%c12(%6)",
86  "%13")
87 
88  "sub %10, %1 \n\t"
89  "mov %2, %0 \n\t"
90  "movl %7, %%ecx \n\t"
91  "add %1, %%"FF_REG_c" \n\t"
92  "movl %%ecx, (%0) \n\t"
93 
94  "test $1, %4 \n\t"
95  " jnz 5f \n\t"
96 
97  "add"FF_OPSIZE" $4, %2 \n\t"
98 
99  "4: \n\t"
100  "add $1, %1 \n\t"
101  "cmp %8, %1 \n\t"
102  " jb 3b \n\t"
103  "mov %2, %0 \n\t"
104  "movl %7, %%ecx \n\t"
105  "add %1, %%"FF_REG_c" \n\t"
106  "movl %%ecx, (%0) \n\t"
107  "5: \n\t"
108  "add %9, %k0 \n\t"
109  "shr $2, %k0 \n\t"
110  : "=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
111  "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
112  : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
113  "i"(offsetof(CABACContext, bytestream)),
114  "i"(offsetof(CABACContext, bytestream_end))
115  TABLES_ARG
116  : "%"FF_REG_c, "memory"
117  );
118  return coeff_count;
119 }
120 
121 #define decode_significance_8x8 decode_significance_8x8_x86
122 static int decode_significance_8x8_x86(CABACContext *c,
123  uint8_t *significant_coeff_ctx_base,
124  int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){
125  int minusindex= 4-(intptr_t)index;
126  int bit;
127  x86_reg coeff_count;
128  x86_reg last=0;
129  x86_reg state;
130 
131 #ifdef BROKEN_RELOCATIONS
132  void *tables;
133 
134  __asm__ volatile(
135  "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
136  : "=&r"(tables)
138  );
139 #endif
140 
141  __asm__ volatile(
142  "mov %1, %6 \n\t"
143  "3: \n\t"
144 
145  "mov %10, %0 \n\t"
146  "movzb (%0, %6), %6 \n\t"
147  "add %9, %6 \n\t"
148 
149  BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
150  "%5", "%q5", "%k0", "%b0",
151  "%c12(%7)", "%c13(%7)",
155  "%15")
156 
157  "mov %1, %6 \n\t"
158  "test $1, %4 \n\t"
159  " jz 4f \n\t"
160 
161 #ifdef BROKEN_RELOCATIONS
162  "movzb %c14(%15, %q6), %6\n\t"
163 #else
164  "movzb "MANGLE(ff_h264_cabac_tables)"+%c14(%6), %6\n\t"
165 #endif
166  "add %11, %6 \n\t"
167 
168  BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
169  "%5", "%q5", "%k0", "%b0",
170  "%c12(%7)", "%c13(%7)",
174  "%15")
175 
176  "mov %2, %0 \n\t"
177  "mov %1, %6 \n\t"
178  "mov %k6, (%0) \n\t"
179 
180  "test $1, %4 \n\t"
181  " jnz 5f \n\t"
182 
183  "add"FF_OPSIZE" $4, %2 \n\t"
184 
185  "4: \n\t"
186  "add $1, %6 \n\t"
187  "mov %6, %1 \n\t"
188  "cmp $63, %6 \n\t"
189  " jb 3b \n\t"
190  "mov %2, %0 \n\t"
191  "mov %k6, (%0) \n\t"
192  "5: \n\t"
193  "addl %8, %k0 \n\t"
194  "shr $2, %k0 \n\t"
195  : "=&q"(coeff_count), "+"REG64(last), "+"REG64(index), "+&r"(c->low),
196  "=&r"(bit), "+&r"(c->range), "=&r"(state)
197  : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
198  REG64(sig_off), REG64(last_coeff_ctx_base),
199  "i"(offsetof(CABACContext, bytestream)),
200  "i"(offsetof(CABACContext, bytestream_end)),
202  : "%"FF_REG_c, "memory"
203  );
204  return coeff_count;
205 }
206 #endif /* HAVE_7REGS && BROKEN_COMPILER */
207 
208 #endif /* HAVE_INLINE_ASM */
cabac.h
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:56
NAMED_CONSTRAINTS_ARRAY
#define NAMED_CONSTRAINTS_ARRAY(...)
Definition: asm.h:151
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
ff_h264_cabac_tables
const uint8_t ff_h264_cabac_tables[512+4 *2 *64+4 *64+63]
index
int index
Definition: gxfenc.c:89
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
H264_LPS_RANGE_OFFSET
#define H264_LPS_RANGE_OFFSET
Definition: cabac.h:34
AV_STRINGIFY
#define AV_STRINGIFY(s)
Definition: macros.h:66
H264_NORM_SHIFT_OFFSET
#define H264_NORM_SHIFT_OFFSET
Definition: cabac.h:33
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET
#define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET
Definition: cabac.h:36
state
static struct @362 state
MANGLE
#define MANGLE(a)
Definition: asm.h:127
x86_reg
int x86_reg
Definition: asm.h:72
H264_MLPS_STATE_OFFSET
#define H264_MLPS_STATE_OFFSET
Definition: cabac.h:35
CABACContext
Definition: cabac.h:41