FFmpeg
h264_qpel.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
3  * Copyright (c) 2011 Daniel Kang
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <stddef.h>
23 
24 #include "libavutil/attributes.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/mem_internal.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavcodec/h264qpel.h"
30 #include "fpel.h"
31 #include "qpel.h"
32 
33 #if HAVE_X86ASM
34 void ff_avg_pixels4_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
35 void ff_put_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
36  ptrdiff_t stride);
37 void ff_avg_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
38  ptrdiff_t stride);
39 #define ff_put_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride) \
40  ff_put_pixels4x4_l2_mmxext((dst), (src1), (src2), (dststride))
41 #define ff_avg_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride) \
42  ff_avg_pixels4x4_l2_mmxext((dst), (src1), (src2), (dststride))
43 #define ff_put_pixels8x8_l2_sse2 ff_put_pixels8x8_l2_mmxext
44 #define ff_avg_pixels8x8_l2_sse2 ff_avg_pixels8x8_l2_mmxext
45 
46 #define DEF_QPEL(OPNAME)\
47 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
48 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
49 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
50 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
51 void ff_ ## OPNAME ## _h264_qpel16_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
52 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
53 void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
54 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h);\
55 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, ptrdiff_t dstStride);\
56 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride, int size);\
57 void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_sse2(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
58 void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_sse2(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
59 void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
60 void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
61 void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
62 void ff_ ## OPNAME ## _pixels8_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
63 void ff_ ## OPNAME ## _pixels16_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
64 
65 void ff_put_h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride);
66 
67 DEF_QPEL(avg)
68 DEF_QPEL(put)
69 
70 #define QPEL_H264(OPNAME, MMX)\
71 static av_always_inline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
72 {\
73  src -= 2*srcStride+2;\
74  ff_put_h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\
75  ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
76 }\
77 
78 #define QPEL_H264_H16(OPNAME, EXT) \
79 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## EXT(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)\
80 {\
81  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst , src , src2 , dstStride, src2Stride);\
82  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst+8, src+8, src2+8, dstStride, src2Stride);\
83  src += 8*dstStride;\
84  dst += 8*dstStride;\
85  src2 += 8*src2Stride;\
86  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst , src , src2 , dstStride, src2Stride);\
87  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst+8, src+8, src2+8, dstStride, src2Stride);\
88 }\
89 
90 
91 #if ARCH_X86_64
92 #define QPEL_H264_H16_XMM(OPNAME, MMX)\
93 
94 void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);
95 void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);
96 
97 #else // ARCH_X86_64
98 #define QPEL_H264_H16_XMM(OPNAME, EXT) QPEL_H264_H16(OPNAME, EXT)
99 #endif // ARCH_X86_64
100 
101 #define QPEL_H264_H_XMM(OPNAME, MMX)\
102 QPEL_H264_H16_XMM(OPNAME, MMX)\
103 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
104 {\
105  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
106  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
107  src += 8*srcStride;\
108  dst += 8*dstStride;\
109  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
110  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
111 }\
112 
113 #define QPEL_H264_V_XMM(OPNAME, XMM, XMM2)\
114 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## XMM(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
115 {\
116  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## XMM2(dst , src , dstStride, srcStride, 8);\
117 }\
118 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## XMM(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
119 {\
120  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## XMM2(dst , src , dstStride, srcStride, 16);\
121  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## XMM2(dst+8, src+8, dstStride, srcStride, 16);\
122 }
123 
124 static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
125  const uint8_t *src,
126  ptrdiff_t srcStride,
127  int size)
128 {
129  int w = (size+8)>>3;
130  src -= 2*srcStride+2;
131  while(w--){
132  ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
133  tmp += 8;
134  src += 8;
135  }
136 }
137 
138 #define QPEL_H264_HV_XMM(OPNAME, MMX)\
139 static av_always_inline void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
140 {\
141  put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, srcStride, 8);\
142  ff_ ## OPNAME ## h264_qpel8_hv2_lowpass_ ## MMX(dst, tmp, dstStride);\
143 }\
144 static av_always_inline void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
145 {\
146  put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, srcStride, 16);\
147  ff_ ## OPNAME ## h264_qpel16_hv2_lowpass_ ## MMX(dst, tmp, dstStride);\
148 }\
149 
150 #define H264_MC_V_H_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT) \
151 H264_MC_V(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
152 H264_MC_H(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
153 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
154 
155 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN, UNUSED) \
156 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
157 {\
158  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
159 }\
160 \
161 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
162 {\
163  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
164 }\
165 \
166 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
167 {\
168  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
169 }\
170 
171 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN, UNUSED) \
172 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
173 {\
174  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
175  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
176  ff_ ## OPNAME ## pixels ## SIZE ## x ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride);\
177 }\
178 \
179 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
180 {\
181  ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
182 }\
183 \
184 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
185 {\
186  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
187  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
188  ff_ ## OPNAME ## pixels ## SIZE ## x ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride);\
189 }\
190 
191 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT) \
192 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
193 {\
194  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
195  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
196  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
197 }\
198 \
199 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
200 {\
201  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
202  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
203  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
204 }\
205 \
206 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
207 {\
208  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
209  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
210  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
211 }\
212 \
213 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
214 {\
215  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
216  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
217  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
218 }\
219 \
220 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
221 {\
222  LOCAL_ALIGNED(ALIGN, uint16_t, temp, [SIZE*(SIZE<8?12:24)]);\
223  OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, stride);\
224 }\
225 \
226 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
227 {\
228  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
229  uint8_t * const halfHV= temp;\
230  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
231  av_assert2(((uintptr_t)temp & 7) == 0);\
232  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
233  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
234 }\
235 \
236 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
237 {\
238  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
239  uint8_t * const halfHV= temp;\
240  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
241  av_assert2(((uintptr_t)temp & 7) == 0);\
242  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
243  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
244 }\
245 \
246 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
247 {\
248  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
249  uint8_t * const halfHV= temp;\
250  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
251  av_assert2(((uintptr_t)temp & 7) == 0);\
252  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
253  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_ ## SHIFT5_EXT(dst, halfV+2, halfHV, stride);\
254 }\
255 \
256 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
257 {\
258  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
259  uint8_t * const halfHV= temp;\
260  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
261  av_assert2(((uintptr_t)temp & 7) == 0);\
262  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
263  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_ ## SHIFT5_EXT(dst, halfV+3, halfHV, stride);\
264 }\
265 
266 #define H264_MC(QPEL, SIZE, MMX, ALIGN, SHIFT5_EXT)\
267 QPEL(put_, SIZE, MMX, ALIGN, SHIFT5_EXT) \
268 QPEL(avg_, SIZE, MMX, ALIGN, SHIFT5_EXT) \
269 
270 #define H264_MC_816(QPEL, XMM, SHIFT5_EXT)\
271 QPEL(put_, 8, XMM, 16, SHIFT5_EXT)\
272 QPEL(put_, 16,XMM, 16, SHIFT5_EXT)\
273 QPEL(avg_, 8, XMM, 16, SHIFT5_EXT)\
274 QPEL(avg_, 16,XMM, 16, SHIFT5_EXT)\
275 
276 QPEL_H264(put_, mmxext)
277 QPEL_H264(avg_, mmxext)
278 QPEL_H264_V_XMM(put_, sse2, sse2)
279 QPEL_H264_V_XMM(avg_, sse2, sse2)
280 QPEL_H264_HV_XMM(put_, sse2)
281 QPEL_H264_HV_XMM(avg_, sse2)
282 QPEL_H264_H_XMM(put_, ssse3)
283 QPEL_H264_H_XMM(avg_, ssse3)
284 QPEL_H264_V_XMM(put_, ssse3, sse2)
285 QPEL_H264_HV_XMM(put_, ssse3)
286 QPEL_H264_HV_XMM(avg_, ssse3)
287 
288 H264_MC(H264_MC_V_H_HV, 4, mmxext, 8, mmxext)
289 H264_MC_816(H264_MC_V, sse2, sse2)
290 H264_MC_816(H264_MC_HV, sse2, sse2)
291 H264_MC_816(H264_MC_H, ssse3, sse2)
292 H264_MC_816(H264_MC_HV, ssse3, sse2)
293 
294 
295 //10bit
296 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
297 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
298  (uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
299 
300 #define LUMA_MC_4(DEPTH, TYPE, OPT) \
301  LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
302  LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT)
303 
304 #define LUMA_MC_816(DEPTH, TYPE, OPT) \
305  LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
306  LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
307  LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
308  LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
309 
310 LUMA_MC_4(10, mc00, mmxext)
311 LUMA_MC_4(10, mc10, mmxext)
312 LUMA_MC_4(10, mc20, mmxext)
313 LUMA_MC_4(10, mc30, mmxext)
314 LUMA_MC_4(10, mc01, mmxext)
315 LUMA_MC_4(10, mc11, mmxext)
316 LUMA_MC_4(10, mc21, mmxext)
317 LUMA_MC_4(10, mc31, mmxext)
318 LUMA_MC_4(10, mc02, mmxext)
319 LUMA_MC_4(10, mc12, mmxext)
320 LUMA_MC_4(10, mc22, mmxext)
321 LUMA_MC_4(10, mc32, mmxext)
322 LUMA_MC_4(10, mc03, mmxext)
323 LUMA_MC_4(10, mc13, mmxext)
324 LUMA_MC_4(10, mc23, mmxext)
325 LUMA_MC_4(10, mc33, mmxext)
326 
327 LUMA_MC_816(10, mc00, sse2)
328 LUMA_MC_816(10, mc10, sse2)
329 LUMA_MC_816(10, mc10, ssse3_cache64)
330 LUMA_MC_816(10, mc20, sse2)
331 LUMA_MC_816(10, mc20, ssse3_cache64)
332 LUMA_MC_816(10, mc30, sse2)
333 LUMA_MC_816(10, mc30, ssse3_cache64)
334 LUMA_MC_816(10, mc01, sse2)
335 LUMA_MC_816(10, mc11, sse2)
336 LUMA_MC_816(10, mc21, sse2)
337 LUMA_MC_816(10, mc31, sse2)
338 LUMA_MC_816(10, mc02, sse2)
339 LUMA_MC_816(10, mc12, sse2)
340 LUMA_MC_816(10, mc22, sse2)
341 LUMA_MC_816(10, mc32, sse2)
342 LUMA_MC_816(10, mc03, sse2)
343 LUMA_MC_816(10, mc13, sse2)
344 LUMA_MC_816(10, mc23, sse2)
345 LUMA_MC_816(10, mc33, sse2)
346 
347 #endif /* HAVE_X86ASM */
348 
349 #define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX) \
350  do { \
351  c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
352  c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
353  c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
354  c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
355  c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
356  c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
357  c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
358  c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
359  c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
360  c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
361  c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
362  c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
363  c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
364  c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
365  c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
366  } while (0)
367 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
368  do { \
369  c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
370  SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX); \
371  } while (0)
372 
373 #define H264_QPEL_FUNCS(x, y, CPU) \
374  do { \
375  c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \
376  c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \
377  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \
378  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \
379  } while (0)
380 
381 #define H264_QPEL_FUNCS_10(x, y, CPU) \
382  do { \
383  c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
384  c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
385  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
386  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
387  } while (0)
388 
390 {
391 #if HAVE_X86ASM
392  int high_bit_depth = bit_depth > 8;
393  int cpu_flags = av_get_cpu_flags();
394 
395  if (EXTERNAL_MMXEXT(cpu_flags)) {
396  if (!high_bit_depth) {
397  SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, );
398  c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
399  SET_QPEL_FUNCS_1PP(avg_h264_qpel, 2, 4, mmxext, );
400  c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_pixels4_mmxext;
401  } else if (bit_depth == 10) {
402  SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
403  SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
404  }
405  }
406 
407  if (EXTERNAL_SSE2(cpu_flags)) {
408  if (!high_bit_depth) {
409  H264_QPEL_FUNCS(0, 1, sse2);
410  H264_QPEL_FUNCS(0, 2, sse2);
411  H264_QPEL_FUNCS(0, 3, sse2);
412  H264_QPEL_FUNCS(1, 1, sse2);
413  H264_QPEL_FUNCS(1, 2, sse2);
414  H264_QPEL_FUNCS(1, 3, sse2);
415  H264_QPEL_FUNCS(2, 1, sse2);
416  H264_QPEL_FUNCS(2, 2, sse2);
417  H264_QPEL_FUNCS(2, 3, sse2);
418  H264_QPEL_FUNCS(3, 1, sse2);
419  H264_QPEL_FUNCS(3, 2, sse2);
420  H264_QPEL_FUNCS(3, 3, sse2);
421  c->put_h264_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
422  c->avg_h264_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
423  }
424 
425  if (bit_depth == 10) {
426  SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
427  SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
428  SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
429  SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
430  H264_QPEL_FUNCS_10(1, 0, sse2);
431  H264_QPEL_FUNCS_10(2, 0, sse2);
432  H264_QPEL_FUNCS_10(3, 0, sse2);
433  }
434  }
435 
436  if (EXTERNAL_SSSE3(cpu_flags)) {
437  if (!high_bit_depth) {
438  H264_QPEL_FUNCS(1, 0, ssse3);
439  H264_QPEL_FUNCS(1, 1, ssse3);
440  H264_QPEL_FUNCS(1, 2, ssse3);
441  H264_QPEL_FUNCS(1, 3, ssse3);
442  H264_QPEL_FUNCS(2, 0, ssse3);
443  H264_QPEL_FUNCS(2, 1, ssse3);
444  H264_QPEL_FUNCS(2, 2, ssse3);
445  H264_QPEL_FUNCS(2, 3, ssse3);
446  H264_QPEL_FUNCS(3, 0, ssse3);
447  H264_QPEL_FUNCS(3, 1, ssse3);
448  H264_QPEL_FUNCS(3, 2, ssse3);
449  H264_QPEL_FUNCS(3, 3, ssse3);
450  }
451 
452  if (bit_depth == 10) {
453  H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
454  H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
455  H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
456  }
457  }
458 #endif
459 }
cpu.h
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:420
qpel.h
w
uint8_t w
Definition: llviddspenc.c:38
ff_avg_pixels16x16_sse2
void ff_avg_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
h264qpel.h
ff_h264qpel_init_x86
av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
Definition: h264_qpel.c:389
avassert.h
av_cold
#define av_cold
Definition: attributes.h:106
H264_QPEL_FUNCS
#define H264_QPEL_FUNCS(x, y, CPU)
Definition: h264_qpel.c:373
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
SET_QPEL_FUNCS
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:367
ff_put_pixels16x16_sse2
void ff_put_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
cpu.h
size
int size
Definition: twinvq_data.h:10344
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
SET_QPEL_FUNCS_1PP
#define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:349
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:52
src2
const pixel * src2
Definition: h264pred_template.c:421
av_always_inline
#define av_always_inline
Definition: attributes.h:63
H264QpelContext
Definition: h264qpel.h:27
ff_avg_pixels8x8_mmxext
void ff_avg_pixels8x8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
H264_MC
#define H264_MC(OPNAME, NAME, SIZE)
Definition: h264qpel_template.c:307
fpel.h
stride
#define stride
Definition: h264pred_template.c:536
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:58
H264_QPEL_FUNCS_10
#define H264_QPEL_FUNCS_10(x, y, CPU)
Definition: h264_qpel.c:381
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:50
src
#define src
Definition: vp8dsp.c:248