Go to the documentation of this file.
35 #define bf(fn, bd, opt) fn##_##bd##_##opt
36 #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
38 #define AVG_BPC_PROTOTYPES(bpc, opt) \
39 void BF(ff_vvc_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
40 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, intptr_t pixel_max); \
41 void BF(ff_vvc_w_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
42 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, \
43 intptr_t denom, intptr_t w0, intptr_t w1, intptr_t o0, intptr_t o1, intptr_t pixel_max);
45 AVG_BPC_PROTOTYPES( 8, avx2)
46 AVG_BPC_PROTOTYPES(16, avx2)
48 #define DMVR_PROTOTYPES(bd, opt) \
49 void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
50 int height, intptr_t mx, intptr_t my, int width); \
51 void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
52 int height, intptr_t mx, intptr_t my, int width); \
53 void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
54 int height, intptr_t mx, intptr_t my, int width); \
55 void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
56 int height, intptr_t mx, intptr_t my, int width); \
62 #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
63 void ff_vvc_apply_bdof_avx2(uint8_t *
dst, ptrdiff_t dst_stride,
64 const int16_t *
src0,
const int16_t *
src1,
65 int w,
int h,
int pixel_max);
67 #define OF_FUNC(bd, opt) \
68 static void vvc_apply_bdof_##bd##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
69 const int16_t *src0, const int16_t *src1, int w, int h) \
71 ff_vvc_apply_bdof##_##opt(dst, dst_stride, src0, src1, w, h, (1 << bd) - 1); \
78 #define OF_INIT(bd) c->inter.apply_bdof = vvc_apply_bdof_##bd##_avx2
81 #define ALF_BPC_PROTOTYPES(bpc, opt) \
82 void BF(ff_vvc_alf_filter_luma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
83 const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, \
84 const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); \
85 void BF(ff_vvc_alf_filter_chroma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
86 const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, \
87 const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); \
88 void BF(ff_vvc_alf_classify_grad, bpc, opt)(int *gradient_sum, \
89 const uint8_t *src, ptrdiff_t src_stride, intptr_t width, intptr_t height, intptr_t vb_pos); \
90 void BF(ff_vvc_alf_classify, bpc, opt)(int *class_idx, int *transpose_idx, const int *gradient_sum, \
91 intptr_t width, intptr_t height, intptr_t vb_pos, intptr_t bit_depth); \
93 ALF_BPC_PROTOTYPES(8, avx2)
94 ALF_BPC_PROTOTYPES(16, avx2)
97 #define FW_PUT(name, depth, opt) \
98 static void vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
99 int height, const int8_t *hf, const int8_t *vf, int width) \
101 ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
104 #if HAVE_SSE4_EXTERNAL
105 #define FW_PUT_TAP(fname, bitd, opt ) \
106 FW_PUT(fname##4, bitd, opt ) \
107 FW_PUT(fname##8, bitd, opt ) \
108 FW_PUT(fname##16, bitd, opt ) \
109 FW_PUT(fname##32, bitd, opt ) \
110 FW_PUT(fname##64, bitd, opt ) \
111 FW_PUT(fname##128, bitd, opt ) \
113 #define FW_PUT_4TAP(fname, bitd, opt) \
114 FW_PUT(fname ## 2, bitd, opt) \
115 FW_PUT_TAP(fname, bitd, opt)
117 #define FW_PUT_4TAP_SSE4(bitd) \
118 FW_PUT_4TAP(pixels, bitd, sse4) \
119 FW_PUT_4TAP(4tap_h, bitd, sse4) \
120 FW_PUT_4TAP(4tap_v, bitd, sse4) \
121 FW_PUT_4TAP(4tap_hv, bitd, sse4)
123 #define FW_PUT_8TAP_SSE4(bitd) \
124 FW_PUT_TAP(8tap_h, bitd, sse4) \
125 FW_PUT_TAP(8tap_v, bitd, sse4) \
126 FW_PUT_TAP(8tap_hv, bitd, sse4)
128 #define FW_PUT_SSE4(bitd) \
129 FW_PUT_4TAP_SSE4(bitd) \
130 FW_PUT_8TAP_SSE4(bitd)
137 #if HAVE_AVX2_EXTERNAL
138 #define FW_PUT_TAP_AVX2(n, bitd) \
139 FW_PUT(n ## tap_h32, bitd, avx2) \
140 FW_PUT(n ## tap_h64, bitd, avx2) \
141 FW_PUT(n ## tap_h128, bitd, avx2) \
142 FW_PUT(n ## tap_v32, bitd, avx2) \
143 FW_PUT(n ## tap_v64, bitd, avx2) \
144 FW_PUT(n ## tap_v128, bitd, avx2)
146 #define FW_PUT_AVX2(bitd) \
147 FW_PUT(pixels32, bitd, avx2) \
148 FW_PUT(pixels64, bitd, avx2) \
149 FW_PUT(pixels128, bitd, avx2) \
150 FW_PUT_TAP_AVX2(4, bitd) \
151 FW_PUT_TAP_AVX2(8, bitd) \
157 #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
158 FW_PUT(n ## tap_h16, bitd, avx2) \
159 FW_PUT(n ## tap_v16, bitd, avx2) \
160 FW_PUT(n ## tap_hv16, bitd, avx2) \
161 FW_PUT(n ## tap_hv32, bitd, avx2) \
162 FW_PUT(n ## tap_hv64, bitd, avx2) \
163 FW_PUT(n ## tap_hv128, bitd, avx2)
165 #define FW_PUT_16BPC_AVX2(bitd) \
166 FW_PUT(pixels16, bitd, avx2) \
167 FW_PUT_TAP_16BPC_AVX2(4, bitd) \
168 FW_PUT_TAP_16BPC_AVX2(8, bitd)
170 FW_PUT_16BPC_AVX2(10)
171 FW_PUT_16BPC_AVX2(12)
173 #define AVG_FUNCS(bpc, bd, opt) \
174 static void bf(vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
175 const int16_t *src0, const int16_t *src1, int width, int height) \
177 BF(ff_vvc_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, (1 << bd) - 1); \
179 static void bf(vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
180 const int16_t *src0, const int16_t *src1, int width, int height, \
181 int denom, int w0, int w1, int o0, int o1) \
183 BF(ff_vvc_w_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, \
184 denom, w0, w1, o0, o1, (1 << bd) - 1); \
187 AVG_FUNCS(8, 8, avx2)
188 AVG_FUNCS(16, 10, avx2)
189 AVG_FUNCS(16, 12, avx2)
191 #define ALF_FUNCS(bpc, bd, opt) \
192 static void bf(vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
193 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos) \
195 const int param_stride = (width >> 2) * ALF_NUM_COEFF_LUMA; \
196 BF(ff_vvc_alf_filter_luma, bpc, opt)(dst, dst_stride, src, src_stride, width, height, \
197 filter, clip, param_stride, vb_pos, (1 << bd) - 1); \
199 static void bf(vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
200 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos) \
202 BF(ff_vvc_alf_filter_chroma, bpc, opt)(dst, dst_stride, src, src_stride, width, height, \
203 filter, clip, 0, vb_pos,(1 << bd) - 1); \
205 static void bf(vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
206 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp) \
208 BF(ff_vvc_alf_classify_grad, bpc, opt)(gradient_tmp, src, src_stride, width, height, vb_pos); \
209 BF(ff_vvc_alf_classify, bpc, opt)(class_idx, transpose_idx, gradient_tmp, width, height, vb_pos, bd); \
212 ALF_FUNCS(8, 8, avx2)
213 ALF_FUNCS(16, 10, avx2)
214 ALF_FUNCS(16, 12, avx2)
218 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
219 dst[C][W][idx1][idx2] = vvc_put_## name ## _ ## D ## _##opt; \
220 dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
222 #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
223 PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
224 PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
225 PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
226 PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
227 PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
228 PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
230 #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
231 MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
233 #define MC_8TAP_LINKS_SSE4(bd) \
234 MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
235 MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
236 MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
237 MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
239 #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
240 PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
241 MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
243 #define MC_4TAP_LINKS_SSE4(bd) \
244 MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
245 MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
246 MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
247 MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
249 #define MC_LINK_SSE4(bd) \
250 MC_4TAP_LINKS_SSE4(bd) \
251 MC_8TAP_LINKS_SSE4(bd)
253 #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
254 PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
255 PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
256 PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
257 PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
258 PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
259 PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
260 PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
261 PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
262 PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
265 #define MC_LINKS_AVX2(bd) \
266 MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
267 MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
269 #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
270 PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
271 PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
272 PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
273 PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
274 PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
275 PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
276 PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
279 #define MC_LINKS_16BPC_AVX2(bd) \
280 MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
281 MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
283 #define AVG_INIT(bd, opt) do { \
284 c->inter.avg = bf(vvc_avg, bd, opt); \
285 c->inter.w_avg = bf(vvc_w_avg, bd, opt); \
288 #define DMVR_INIT(bd) do { \
289 c->inter.dmvr[0][0] = ff_vvc_dmvr_##bd##_avx2; \
290 c->inter.dmvr[0][1] = ff_vvc_dmvr_h_##bd##_avx2; \
291 c->inter.dmvr[1][0] = ff_vvc_dmvr_v_##bd##_avx2; \
292 c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_avx2; \
295 #define ALF_INIT(bd) do { \
296 c->alf.filter[LUMA] = vvc_alf_filter_luma_##bd##_avx2; \
297 c->alf.filter[CHROMA] = vvc_alf_filter_chroma_##bd##_avx2; \
298 c->alf.classify = vvc_alf_classify_##bd##_avx2; \
301 int ff_vvc_sad_avx2(
const int16_t *
src0,
const int16_t *
src1,
int dx,
int dy,
int block_w,
int block_h);
302 #define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2
306 #endif // ARCH_X86_64
315 #if HAVE_SSE4_EXTERNAL
320 #if HAVE_AVX2_EXTERNAL
332 #if HAVE_SSE4_EXTERNAL
337 #if HAVE_AVX2_EXTERNAL
342 MC_LINKS_16BPC_AVX2(10);
350 #if HAVE_SSE4_EXTERNAL
355 #if HAVE_AVX2_EXTERNAL
360 MC_LINKS_16BPC_AVX2(12);
#define EXTERNAL_AVX2_FAST(flags)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
#define DMVR_PROTOTYPES(bd, opt)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
#define DMVR_INIT(bd, opt)
#define EXTERNAL_SSE4(flags)