FFmpeg
h264_qpel.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
3  * Copyright (c) 2011 Daniel Kang
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <stddef.h>
23 
24 #include "libavutil/attributes.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/mem_internal.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavcodec/h264qpel.h"
30 #include "fpel.h"
31 
32 #if HAVE_X86ASM
33 void ff_avg_pixels4_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
34 void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
35  ptrdiff_t stride);
36 void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
37  ptrdiff_t stride);
38 void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
39  ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
40 void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
41  ptrdiff_t dstStride, ptrdiff_t src1Stride);
42 void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
43  ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
44 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
45  ptrdiff_t dstStride, ptrdiff_t src1Stride);
46 #define ff_put_pixels4_l2_mmxext(dst, src1, src2, dststride, src1stride, h) \
47  ff_put_pixels4_l2_mmxext((dst), (src1), (src2), (dststride))
48 #define ff_avg_pixels4_l2_mmxext(dst, src1, src2, dststride, src1stride, h) \
49  ff_avg_pixels4_l2_mmxext((dst), (src1), (src2), (dststride))
50 #define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext
51 #define ff_avg_pixels8_l2_sse2(dst, src1, src2, dststride, src1stride, h) \
52  ff_avg_pixels8_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride))
53 #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
54 #define ff_avg_pixels16_l2_sse2(dst, src1, src2, dststride, src1stride, h) \
55  ff_avg_pixels16_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride))
56 
57 #define DEF_QPEL(OPNAME)\
58 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
59 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
60 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
61 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
62 void ff_ ## OPNAME ## _h264_qpel16_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
63 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
64 void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
65 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h);\
66 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, ptrdiff_t dstStride);\
67 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride, int size);\
68 void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_sse2(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
69 void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_sse2(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
70 void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
71 void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
72 void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
73 void ff_ ## OPNAME ## _pixels8_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
74 void ff_ ## OPNAME ## _pixels16_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
75 
76 void ff_put_h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride);
77 
78 DEF_QPEL(avg)
79 DEF_QPEL(put)
80 
81 #define QPEL_H264(OPNAME, MMX)\
82 static av_always_inline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
83 {\
84  src -= 2*srcStride+2;\
85  ff_put_h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\
86  ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
87 }\
88 
89 #define QPEL_H264_H16(OPNAME, EXT) \
90 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## EXT(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)\
91 {\
92  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst , src , src2 , dstStride, src2Stride);\
93  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst+8, src+8, src2+8, dstStride, src2Stride);\
94  src += 8*dstStride;\
95  dst += 8*dstStride;\
96  src2 += 8*src2Stride;\
97  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst , src , src2 , dstStride, src2Stride);\
98  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst+8, src+8, src2+8, dstStride, src2Stride);\
99 }\
100 
101 
102 #if ARCH_X86_64
103 #define QPEL_H264_H16_XMM(OPNAME, MMX)\
104 
105 void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);
106 void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);
107 
108 #else // ARCH_X86_64
109 #define QPEL_H264_H16_XMM(OPNAME, EXT) QPEL_H264_H16(OPNAME, EXT)
110 #endif // ARCH_X86_64
111 
112 #define QPEL_H264_H_XMM(OPNAME, MMX)\
113 QPEL_H264_H16_XMM(OPNAME, MMX)\
114 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
115 {\
116  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
117  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
118  src += 8*srcStride;\
119  dst += 8*dstStride;\
120  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
121  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
122 }\
123 
124 #define QPEL_H264_V_XMM(OPNAME, XMM, XMM2)\
125 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## XMM(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
126 {\
127  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## XMM2(dst , src , dstStride, srcStride, 8);\
128 }\
129 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## XMM(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
130 {\
131  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## XMM2(dst , src , dstStride, srcStride, 16);\
132  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## XMM2(dst+8, src+8, dstStride, srcStride, 16);\
133 }
134 
135 static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
136  const uint8_t *src,
137  ptrdiff_t srcStride,
138  int size)
139 {
140  int w = (size+8)>>3;
141  src -= 2*srcStride+2;
142  while(w--){
143  ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
144  tmp += 8;
145  src += 8;
146  }
147 }
148 
149 #define QPEL_H264_HV_XMM(OPNAME, MMX)\
150 static av_always_inline void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
151 {\
152  put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, srcStride, 8);\
153  ff_ ## OPNAME ## h264_qpel8_hv2_lowpass_ ## MMX(dst, tmp, dstStride);\
154 }\
155 static av_always_inline void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
156 {\
157  put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, srcStride, 16);\
158  ff_ ## OPNAME ## h264_qpel16_hv2_lowpass_ ## MMX(dst, tmp, dstStride);\
159 }\
160 
161 #define H264_MC_V_H_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT) \
162 H264_MC_V(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
163 H264_MC_H(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
164 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
165 
166 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN, UNUSED) \
167 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
168 {\
169  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
170 }\
171 \
172 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
173 {\
174  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
175 }\
176 \
177 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
178 {\
179  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
180 }\
181 
182 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN, UNUSED) \
183 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
184 {\
185  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
186  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
187  ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
188 }\
189 \
190 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
191 {\
192  ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
193 }\
194 \
195 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
196 {\
197  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
198  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
199  ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
200 }\
201 
202 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT) \
203 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
204 {\
205  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
206  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
207  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
208 }\
209 \
210 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
211 {\
212  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
213  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
214  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
215 }\
216 \
217 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
218 {\
219  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
220  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
221  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
222 }\
223 \
224 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
225 {\
226  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
227  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
228  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
229 }\
230 \
231 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
232 {\
233  LOCAL_ALIGNED(ALIGN, uint16_t, temp, [SIZE*(SIZE<8?12:24)]);\
234  OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, stride);\
235 }\
236 \
237 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
238 {\
239  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
240  uint8_t * const halfHV= temp;\
241  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
242  av_assert2(((uintptr_t)temp & 7) == 0);\
243  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
244  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
245 }\
246 \
247 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
248 {\
249  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
250  uint8_t * const halfHV= temp;\
251  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
252  av_assert2(((uintptr_t)temp & 7) == 0);\
253  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
254  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
255 }\
256 \
257 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
258 {\
259  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
260  uint8_t * const halfHV= temp;\
261  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
262  av_assert2(((uintptr_t)temp & 7) == 0);\
263  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
264  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_ ## SHIFT5_EXT(dst, halfV+2, halfHV, stride);\
265 }\
266 \
267 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
268 {\
269  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
270  uint8_t * const halfHV= temp;\
271  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
272  av_assert2(((uintptr_t)temp & 7) == 0);\
273  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
274  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_ ## SHIFT5_EXT(dst, halfV+3, halfHV, stride);\
275 }\
276 
277 #define H264_MC(QPEL, SIZE, MMX, ALIGN, SHIFT5_EXT)\
278 QPEL(put_, SIZE, MMX, ALIGN, SHIFT5_EXT) \
279 QPEL(avg_, SIZE, MMX, ALIGN, SHIFT5_EXT) \
280 
281 #define H264_MC_816(QPEL, XMM, SHIFT5_EXT)\
282 QPEL(put_, 8, XMM, 16, SHIFT5_EXT)\
283 QPEL(put_, 16,XMM, 16, SHIFT5_EXT)\
284 QPEL(avg_, 8, XMM, 16, SHIFT5_EXT)\
285 QPEL(avg_, 16,XMM, 16, SHIFT5_EXT)\
286 
287 QPEL_H264(put_, mmxext)
288 QPEL_H264(avg_, mmxext)
289 QPEL_H264_V_XMM(put_, sse2, sse2)
290 QPEL_H264_V_XMM(avg_, sse2, sse2)
291 QPEL_H264_HV_XMM(put_, sse2)
292 QPEL_H264_HV_XMM(avg_, sse2)
293 QPEL_H264_H_XMM(put_, ssse3)
294 QPEL_H264_H_XMM(avg_, ssse3)
295 QPEL_H264_V_XMM(put_, ssse3, sse2)
296 QPEL_H264_HV_XMM(put_, ssse3)
297 QPEL_H264_HV_XMM(avg_, ssse3)
298 
299 H264_MC(H264_MC_V_H_HV, 4, mmxext, 8, mmxext)
300 H264_MC_816(H264_MC_V, sse2, sse2)
301 H264_MC_816(H264_MC_HV, sse2, sse2)
302 H264_MC_816(H264_MC_H, ssse3, sse2)
303 H264_MC_816(H264_MC_HV, ssse3, sse2)
304 
305 
306 //10bit
307 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
308 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
309  (uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
310 
311 #define LUMA_MC_4(DEPTH, TYPE, OPT) \
312  LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
313  LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT)
314 
315 #define LUMA_MC_816(DEPTH, TYPE, OPT) \
316  LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
317  LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
318  LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
319  LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
320 
321 LUMA_MC_4(10, mc00, mmxext)
322 LUMA_MC_4(10, mc10, mmxext)
323 LUMA_MC_4(10, mc20, mmxext)
324 LUMA_MC_4(10, mc30, mmxext)
325 LUMA_MC_4(10, mc01, mmxext)
326 LUMA_MC_4(10, mc11, mmxext)
327 LUMA_MC_4(10, mc21, mmxext)
328 LUMA_MC_4(10, mc31, mmxext)
329 LUMA_MC_4(10, mc02, mmxext)
330 LUMA_MC_4(10, mc12, mmxext)
331 LUMA_MC_4(10, mc22, mmxext)
332 LUMA_MC_4(10, mc32, mmxext)
333 LUMA_MC_4(10, mc03, mmxext)
334 LUMA_MC_4(10, mc13, mmxext)
335 LUMA_MC_4(10, mc23, mmxext)
336 LUMA_MC_4(10, mc33, mmxext)
337 
338 LUMA_MC_816(10, mc00, sse2)
339 LUMA_MC_816(10, mc10, sse2)
340 LUMA_MC_816(10, mc10, ssse3_cache64)
341 LUMA_MC_816(10, mc20, sse2)
342 LUMA_MC_816(10, mc20, ssse3_cache64)
343 LUMA_MC_816(10, mc30, sse2)
344 LUMA_MC_816(10, mc30, ssse3_cache64)
345 LUMA_MC_816(10, mc01, sse2)
346 LUMA_MC_816(10, mc11, sse2)
347 LUMA_MC_816(10, mc21, sse2)
348 LUMA_MC_816(10, mc31, sse2)
349 LUMA_MC_816(10, mc02, sse2)
350 LUMA_MC_816(10, mc12, sse2)
351 LUMA_MC_816(10, mc22, sse2)
352 LUMA_MC_816(10, mc32, sse2)
353 LUMA_MC_816(10, mc03, sse2)
354 LUMA_MC_816(10, mc13, sse2)
355 LUMA_MC_816(10, mc23, sse2)
356 LUMA_MC_816(10, mc33, sse2)
357 
358 #endif /* HAVE_X86ASM */
359 
360 #define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX) \
361  do { \
362  c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
363  c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
364  c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
365  c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
366  c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
367  c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
368  c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
369  c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
370  c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
371  c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
372  c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
373  c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
374  c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
375  c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
376  c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
377  } while (0)
378 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
379  do { \
380  c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
381  SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX); \
382  } while (0)
383 
384 #define H264_QPEL_FUNCS(x, y, CPU) \
385  do { \
386  c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \
387  c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \
388  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \
389  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \
390  } while (0)
391 
392 #define H264_QPEL_FUNCS_10(x, y, CPU) \
393  do { \
394  c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
395  c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
396  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
397  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
398  } while (0)
399 
401 {
402 #if HAVE_X86ASM
403  int high_bit_depth = bit_depth > 8;
404  int cpu_flags = av_get_cpu_flags();
405 
406  if (EXTERNAL_MMXEXT(cpu_flags)) {
407  if (!high_bit_depth) {
408  SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, );
409  c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
410  SET_QPEL_FUNCS_1PP(avg_h264_qpel, 2, 4, mmxext, );
411  c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_pixels4_mmxext;
412  } else if (bit_depth == 10) {
413  SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
414  SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
415  }
416  }
417 
418  if (EXTERNAL_SSE2(cpu_flags)) {
419  if (!high_bit_depth) {
420  H264_QPEL_FUNCS(0, 1, sse2);
421  H264_QPEL_FUNCS(0, 2, sse2);
422  H264_QPEL_FUNCS(0, 3, sse2);
423  H264_QPEL_FUNCS(1, 1, sse2);
424  H264_QPEL_FUNCS(1, 2, sse2);
425  H264_QPEL_FUNCS(1, 3, sse2);
426  H264_QPEL_FUNCS(2, 1, sse2);
427  H264_QPEL_FUNCS(2, 2, sse2);
428  H264_QPEL_FUNCS(2, 3, sse2);
429  H264_QPEL_FUNCS(3, 1, sse2);
430  H264_QPEL_FUNCS(3, 2, sse2);
431  H264_QPEL_FUNCS(3, 3, sse2);
432  c->put_h264_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
433  c->avg_h264_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
434  }
435 
436  if (bit_depth == 10) {
437  SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
438  SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
439  SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
440  SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
441  H264_QPEL_FUNCS_10(1, 0, sse2);
442  H264_QPEL_FUNCS_10(2, 0, sse2);
443  H264_QPEL_FUNCS_10(3, 0, sse2);
444  }
445  }
446 
447  if (EXTERNAL_SSSE3(cpu_flags)) {
448  if (!high_bit_depth) {
449  H264_QPEL_FUNCS(1, 0, ssse3);
450  H264_QPEL_FUNCS(1, 1, ssse3);
451  H264_QPEL_FUNCS(1, 2, ssse3);
452  H264_QPEL_FUNCS(1, 3, ssse3);
453  H264_QPEL_FUNCS(2, 0, ssse3);
454  H264_QPEL_FUNCS(2, 1, ssse3);
455  H264_QPEL_FUNCS(2, 2, ssse3);
456  H264_QPEL_FUNCS(2, 3, ssse3);
457  H264_QPEL_FUNCS(3, 0, ssse3);
458  H264_QPEL_FUNCS(3, 1, ssse3);
459  H264_QPEL_FUNCS(3, 2, ssse3);
460  H264_QPEL_FUNCS(3, 3, ssse3);
461  }
462 
463  if (bit_depth == 10) {
464  H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
465  H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
466  H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
467  }
468  }
469 #endif
470 }
ff_avg_pixels8_l2_mmxext
void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride)
cpu.h
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:420
w
uint8_t w
Definition: llviddspenc.c:38
ff_avg_pixels16x16_sse2
void ff_avg_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
h264qpel.h
ff_h264qpel_init_x86
av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
Definition: h264_qpel.c:400
avassert.h
av_cold
#define av_cold
Definition: attributes.h:106
H264_QPEL_FUNCS
#define H264_QPEL_FUNCS(x, y, CPU)
Definition: h264_qpel.c:384
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
SET_QPEL_FUNCS
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:378
ff_put_pixels16x16_sse2
void ff_put_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
cpu.h
size
int size
Definition: twinvq_data.h:10344
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
SET_QPEL_FUNCS_1PP
#define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:360
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
src2
const pixel * src2
Definition: h264pred_template.c:421
av_always_inline
#define av_always_inline
Definition: attributes.h:63
H264QpelContext
Definition: h264qpel.h:27
stride
#define stride
Definition: h264pred_template.c:536
ff_avg_pixels8x8_mmxext
void ff_avg_pixels8x8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
H264_MC
#define H264_MC(OPNAME, NAME, SIZE)
Definition: h264qpel_template.c:307
ff_put_pixels8_l2_mmxext
void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride, int h)
ff_avg_pixels16_l2_mmxext
void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride)
fpel.h
ff_put_pixels16_l2_mmxext
void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride, int h)
h
h
Definition: vp9dsp_template.c:2070
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
H264_QPEL_FUNCS_10
#define H264_QPEL_FUNCS_10(x, y, CPU)
Definition: h264_qpel.c:392
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
src
#define src
Definition: vp8dsp.c:248