35 #define LFC_FUNC(DIR, DEPTH, OPT) \
36 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
38 #define LFL_FUNC(DIR, DEPTH, OPT) \
39 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
41 #define LFC_FUNCS(type, depth, opt) \
42 LFC_FUNC(h, depth, opt) \
43 LFC_FUNC(v, depth, opt)
45 #define LFL_FUNCS(type, depth, opt) \
46 LFL_FUNC(h, depth, opt) \
47 LFL_FUNC(v, depth, opt)
65 #define IDCT_DC_FUNCS(W, opt) \
66 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
67 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
68 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
77 #define IDCT_FUNCS(opt) \
78 void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
79 void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
80 void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
81 void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
82 void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
83 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
84 void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
85 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
91 #define ff_hevc_pel_filters ff_hevc_qpel_filters
92 #define DECL_HV_FILTER(f) \
93 const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
94 const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
96 #define FW_PUT(p, a, b, depth, opt) \
97 static void hevc_put_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
98 int height, intptr_t mx, intptr_t my,int width) \
101 ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
104 #define FW_PUT_UNI(p, a, b, depth, opt) \
105 static void hevc_put_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \
106 const uint8_t *src, ptrdiff_t srcstride, \
107 int height, intptr_t mx, intptr_t my, int width) \
110 ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \
113 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
115 #define FW_PUT_FUNCS(p, a, b, depth, opt) \
116 FW_PUT(p, a, b, depth, opt) \
117 FW_PUT_UNI(p, a, b, depth, opt)
119 #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt)
121 #define FW_DIR(npel, n, w, depth, opt) \
122 FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \
123 FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt)
125 #define FW_DIR_HV(npel, n, w, depth, opt) \
126 FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt)
140 #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt)
141 #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt)
142 #define FW_EPEL_FUNCS(w, depth, opt) \
143 FW_EPEL(w, depth, opt) \
144 FW_EPEL_HV(w, depth, opt)
148 FW_EPEL_FUNCS(4, 8, sse4)
149 FW_EPEL_FUNCS(6, 8, sse4)
150 FW_EPEL_FUNCS(8, 8, sse4)
151 FW_EPEL_FUNCS(16, 8, sse4)
152 FW_EPEL_FUNCS(4, 10, sse4)
153 FW_EPEL_FUNCS(6, 10, sse4)
154 FW_EPEL_FUNCS(8, 10, sse4)
155 FW_EPEL_FUNCS(4, 12, sse4)
156 FW_EPEL_FUNCS(6, 12, sse4)
157 FW_EPEL_FUNCS(8, 12, sse4)
159 #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt)
160 #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt)
161 #define FW_QPEL_FUNCS(w, depth, opt) \
162 FW_QPEL(w, depth, opt) \
163 FW_QPEL_HV(w, depth, opt)
168 FW_QPEL_FUNCS(4, 8, sse4)
169 FW_QPEL_FUNCS(8, 8, sse4)
170 FW_QPEL_FUNCS(4, 10, sse4)
171 FW_QPEL_FUNCS(8, 10, sse4)
172 FW_QPEL_FUNCS(4, 12, sse4)
173 FW_QPEL_FUNCS(8, 12, sse4)
175 #if HAVE_AVX2_EXTERNAL
178 FW_PUT(pel, pel_pixels16, pixels16, 10, avx2)
181 FW_EPEL(16, 10, avx2)
183 FW_EPEL_HV(32, 8, avx2)
184 FW_EPEL_HV(16, 10, avx2)
187 FW_QPEL(16, 10, avx2)
189 FW_QPEL_HV(16, 10, avx2)
194 #define mc_rep_func(name, bitd, step, W, opt) \
195 static void hevc_put_##name##W##_##bitd##_##opt(int16_t *_dst, \
196 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
197 intptr_t mx, intptr_t my, int width) \
201 for (i = 0; i < W; i += step) { \
202 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
204 hevc_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
207 #define mc_rep_uni_func(name, bitd, step, W, opt) \
208 static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
209 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
210 intptr_t mx, intptr_t my, int width) \
214 for (i = 0; i < W; i += step) { \
215 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
216 dst = _dst + (i * ((bitd + 7) / 8)); \
217 hevc_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
218 height, mx, my, width); \
221 #define mc_rep_bi_func(name, bitd, step, W, opt) \
222 static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \
223 ptrdiff_t _srcstride, const int16_t *_src2, \
224 int height, intptr_t mx, intptr_t my, int width) \
228 for (i = 0; i < W ; i += step) { \
229 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
230 const int16_t *src2 = _src2 + i; \
231 dst = _dst + (i * ((bitd + 7) / 8)); \
232 ff_hevc_put_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
233 height, mx, my, width); \
237 #define mc_rep_funcs(name, bitd, step, W, opt) \
238 mc_rep_func(name, bitd, step, W, opt) \
239 mc_rep_uni_func(name, bitd, step, W, opt) \
240 mc_rep_bi_func(name, bitd, step, W, opt)
242 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
243 static void hevc_put_##name##W##_##bitd##_##opt(int16_t *dst, \
244 const uint8_t *src, ptrdiff_t _srcstride, int height, \
245 intptr_t mx, intptr_t my, int width) \
247 hevc_put_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
248 hevc_put_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \
249 _srcstride, height, mx, my, width); \
251 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
252 static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
253 const uint8_t *src, ptrdiff_t _srcstride, int height, \
254 intptr_t mx, intptr_t my, int width) \
256 hevc_put_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width); \
257 hevc_put_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
258 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
259 height, mx, my, width); \
261 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
262 static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
263 ptrdiff_t _srcstride, const int16_t *src2, \
264 int height, intptr_t mx, intptr_t my, int width) \
266 ff_hevc_put_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
267 ff_hevc_put_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
268 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
269 src2 + step1, height, mx, my, width); \
272 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
273 mc_rep_func2(name, bitd, step1, step2, W, opt) \
274 mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
275 mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
277 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
279 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
280 static void hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
281 int height, intptr_t mx, intptr_t my, int width) \
284 hevc_put_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \
285 hevc_put_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
288 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
289 static void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
290 ptrdiff_t _srcstride, const int16_t *src2, \
291 int height, intptr_t mx, intptr_t my, int width) \
293 ff_hevc_put_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \
294 height, mx, my, width); \
295 ff_hevc_put_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2, \
296 height, mx, my, width); \
299 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
300 static void hevc_put_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \
301 const uint8_t *src, ptrdiff_t _srcstride, int height, \
302 intptr_t mx, intptr_t my, int width) \
304 hevc_put_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \
305 height, mx, my, width); \
306 hevc_put_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \
307 height, mx, my, width); \
310 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \
311 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
312 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
313 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
315 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
316 static void hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
317 int height, intptr_t mx, intptr_t my, int width) \
320 hevc_put_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \
321 hevc_put_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \
324 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
325 static void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
326 ptrdiff_t _srcstride, const int16_t *src2, \
327 int height, intptr_t mx, intptr_t my, int width) \
329 ff_hevc_put_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
330 src2, height, mx, my, width); \
331 ff_hevc_put_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
332 src2+width2, height, mx, my, width); \
335 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
336 static void hevc_put_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \
337 const uint8_t *src, ptrdiff_t _srcstride, int height, \
338 intptr_t mx, intptr_t my, int width) \
340 hevc_put_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
341 height, mx, my, width); \
342 hevc_put_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
343 height, mx, my, width); \
346 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \
347 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
348 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
349 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
351 #if HAVE_AVX2_EXTERNAL
353 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
354 mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4)
355 mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4)
356 mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4)
358 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
359 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
360 mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32)
361 mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32)
362 mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32)
365 mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32)
366 mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32)
367 mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32)
403 mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4)
406 mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4)
540 #define mc_rep_uni_w(bitd, step, W, opt) \
541 void ff_hevc_put_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
542 int height, int denom, int _wx, int _ox) \
546 for (i = 0; i < W; i += step) { \
547 const int16_t *src = _src + i; \
548 dst= _dst + (i * ((bitd + 7) / 8)); \
549 ff_hevc_put_uni_w##step##_##bitd##_##opt(dst, dststride, src, \
550 height, denom, _wx, _ox); \
554 mc_rep_uni_w(8, 6, 12, sse4)
555 mc_rep_uni_w(8, 8, 16, sse4)
556 mc_rep_uni_w(8, 8, 24, sse4)
557 mc_rep_uni_w(8, 8, 32, sse4)
558 mc_rep_uni_w(8, 8, 48, sse4)
559 mc_rep_uni_w(8, 8, 64, sse4)
561 mc_rep_uni_w(10, 6, 12, sse4)
562 mc_rep_uni_w(10, 8, 16, sse4)
563 mc_rep_uni_w(10, 8, 24, sse4)
564 mc_rep_uni_w(10, 8, 32, sse4)
565 mc_rep_uni_w(10, 8, 48, sse4)
566 mc_rep_uni_w(10, 8, 64, sse4)
568 mc_rep_uni_w(12, 6, 12, sse4)
569 mc_rep_uni_w(12, 8, 16, sse4)
570 mc_rep_uni_w(12, 8, 24, sse4)
571 mc_rep_uni_w(12, 8, 32, sse4)
572 mc_rep_uni_w(12, 8, 48, sse4)
573 mc_rep_uni_w(12, 8, 64, sse4)
575 #define mc_rep_bi_w(bitd, step, W, opt) \
576 void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
577 const int16_t *_src2, int height, \
578 int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
582 for (i = 0; i < W; i += step) { \
583 const int16_t *src = _src + i; \
584 const int16_t *src2 = _src2 + i; \
585 dst = _dst + (i * ((bitd + 7) / 8)); \
586 ff_hevc_put_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \
587 height, denom, _wx0, _wx1, _ox0, _ox1); \
591 mc_rep_bi_w(8, 6, 12, sse4)
592 mc_rep_bi_w(8, 8, 16, sse4)
593 mc_rep_bi_w(8, 8, 24, sse4)
594 mc_rep_bi_w(8, 8, 32, sse4)
595 mc_rep_bi_w(8, 8, 48, sse4)
596 mc_rep_bi_w(8, 8, 64, sse4)
598 mc_rep_bi_w(10, 6, 12, sse4)
599 mc_rep_bi_w(10, 8, 16, sse4)
600 mc_rep_bi_w(10, 8, 24, sse4)
601 mc_rep_bi_w(10, 8, 32, sse4)
602 mc_rep_bi_w(10, 8, 48, sse4)
603 mc_rep_bi_w(10, 8, 64, sse4)
605 mc_rep_bi_w(12, 6, 12, sse4)
606 mc_rep_bi_w(12, 8, 16, sse4)
607 mc_rep_bi_w(12, 8, 24, sse4)
608 mc_rep_bi_w(12, 8, 32, sse4)
609 mc_rep_bi_w(12, 8, 48, sse4)
610 mc_rep_bi_w(12, 8, 64, sse4)
612 #define mc_uni_w_func(name, bitd, W, opt) \
613 static void hevc_put_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
614 const uint8_t *_src, ptrdiff_t _srcstride, \
615 int height, int denom, \
617 intptr_t mx, intptr_t my, int width) \
619 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
620 hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
621 ff_hevc_put_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox); \
624 #define mc_uni_w_funcs(name, bitd, opt) \
625 mc_uni_w_func(name, bitd, 4, opt) \
626 mc_uni_w_func(name, bitd, 8, opt) \
627 mc_uni_w_func(name, bitd, 12, opt) \
628 mc_uni_w_func(name, bitd, 16, opt) \
629 mc_uni_w_func(name, bitd, 24, opt) \
630 mc_uni_w_func(name, bitd, 32, opt) \
631 mc_uni_w_func(name, bitd, 48, opt) \
632 mc_uni_w_func(name, bitd, 64, opt)
634 mc_uni_w_funcs(pel_pixels, 8, sse4)
635 mc_uni_w_func(pel_pixels, 8, 6, sse4)
636 mc_uni_w_funcs(epel_h, 8, sse4)
637 mc_uni_w_func(epel_h, 8, 6, sse4)
638 mc_uni_w_funcs(epel_v, 8, sse4)
639 mc_uni_w_func(epel_v, 8, 6, sse4)
640 mc_uni_w_funcs(epel_hv, 8, sse4)
641 mc_uni_w_func(epel_hv, 8, 6, sse4)
642 mc_uni_w_funcs(qpel_h, 8, sse4)
643 mc_uni_w_funcs(qpel_v, 8, sse4)
644 mc_uni_w_funcs(qpel_hv, 8, sse4)
646 mc_uni_w_funcs(pel_pixels, 10, sse4)
647 mc_uni_w_func(pel_pixels, 10, 6, sse4)
648 mc_uni_w_funcs(epel_h, 10, sse4)
649 mc_uni_w_func(epel_h, 10, 6, sse4)
650 mc_uni_w_funcs(epel_v, 10, sse4)
651 mc_uni_w_func(epel_v, 10, 6, sse4)
652 mc_uni_w_funcs(epel_hv, 10, sse4)
653 mc_uni_w_func(epel_hv, 10, 6, sse4)
654 mc_uni_w_funcs(qpel_h, 10, sse4)
655 mc_uni_w_funcs(qpel_v, 10, sse4)
656 mc_uni_w_funcs(qpel_hv, 10, sse4)
658 mc_uni_w_funcs(pel_pixels, 12, sse4)
659 mc_uni_w_func(pel_pixels, 12, 6, sse4)
660 mc_uni_w_funcs(epel_h, 12, sse4)
661 mc_uni_w_func(epel_h, 12, 6, sse4)
662 mc_uni_w_funcs(epel_v, 12, sse4)
663 mc_uni_w_func(epel_v, 12, 6, sse4)
664 mc_uni_w_funcs(epel_hv, 12, sse4)
665 mc_uni_w_func(epel_hv, 12, 6, sse4)
666 mc_uni_w_funcs(qpel_h, 12, sse4)
667 mc_uni_w_funcs(qpel_v, 12, sse4)
668 mc_uni_w_funcs(qpel_hv, 12, sse4)
670 #define mc_bi_w_func(name, bitd, W, opt) \
671 static void hevc_put_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
672 const uint8_t *_src, ptrdiff_t _srcstride, \
673 const int16_t *_src2, \
674 int height, int denom, \
675 int _wx0, int _wx1, int _ox0, int _ox1, \
676 intptr_t mx, intptr_t my, int width) \
678 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
679 hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
680 ff_hevc_put_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \
681 height, denom, _wx0, _wx1, _ox0, _ox1); \
684 #define mc_bi_w_funcs(name, bitd, opt) \
685 mc_bi_w_func(name, bitd, 4, opt) \
686 mc_bi_w_func(name, bitd, 8, opt) \
687 mc_bi_w_func(name, bitd, 12, opt) \
688 mc_bi_w_func(name, bitd, 16, opt) \
689 mc_bi_w_func(name, bitd, 24, opt) \
690 mc_bi_w_func(name, bitd, 32, opt) \
691 mc_bi_w_func(name, bitd, 48, opt) \
692 mc_bi_w_func(name, bitd, 64, opt)
694 mc_bi_w_funcs(pel_pixels, 8, sse4)
695 mc_bi_w_func(pel_pixels, 8, 6, sse4)
696 mc_bi_w_funcs(epel_h, 8, sse4)
697 mc_bi_w_func(epel_h, 8, 6, sse4)
698 mc_bi_w_funcs(epel_v, 8, sse4)
699 mc_bi_w_func(epel_v, 8, 6, sse4)
700 mc_bi_w_funcs(epel_hv, 8, sse4)
701 mc_bi_w_func(epel_hv, 8, 6, sse4)
702 mc_bi_w_funcs(qpel_h, 8, sse4)
703 mc_bi_w_funcs(qpel_v, 8, sse4)
704 mc_bi_w_funcs(qpel_hv, 8, sse4)
706 mc_bi_w_funcs(pel_pixels, 10, sse4)
707 mc_bi_w_func(pel_pixels, 10, 6, sse4)
708 mc_bi_w_funcs(epel_h, 10, sse4)
709 mc_bi_w_func(epel_h, 10, 6, sse4)
710 mc_bi_w_funcs(epel_v, 10, sse4)
711 mc_bi_w_func(epel_v, 10, 6, sse4)
712 mc_bi_w_funcs(epel_hv, 10, sse4)
713 mc_bi_w_func(epel_hv, 10, 6, sse4)
714 mc_bi_w_funcs(qpel_h, 10, sse4)
715 mc_bi_w_funcs(qpel_v, 10, sse4)
716 mc_bi_w_funcs(qpel_hv, 10, sse4)
718 mc_bi_w_funcs(pel_pixels, 12, sse4)
719 mc_bi_w_func(pel_pixels, 12, 6, sse4)
720 mc_bi_w_funcs(epel_h, 12, sse4)
721 mc_bi_w_func(epel_h, 12, 6, sse4)
722 mc_bi_w_funcs(epel_v, 12, sse4)
723 mc_bi_w_func(epel_v, 12, 6, sse4)
724 mc_bi_w_funcs(epel_hv, 12, sse4)
725 mc_bi_w_func(epel_hv, 12, 6, sse4)
726 mc_bi_w_funcs(qpel_h, 12, sse4)
727 mc_bi_w_funcs(qpel_v, 12, sse4)
728 mc_bi_w_funcs(qpel_hv, 12, sse4)
729 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
731 #define SAO_BAND_FILTER_FUNCS(bitd, opt) \
732 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
733 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
734 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
735 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
736 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
737 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
738 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
739 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
740 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
741 const int16_t *sao_offset_val, int sao_left_class, int width, int height);
753 #define SAO_BAND_INIT(bitd, opt) do { \
754 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \
755 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
756 c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
757 c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
758 c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
761 #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \
762 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
763 const int16_t *sao_offset_val, int eo, int width, int height); \
764 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
765 const int16_t *sao_offset_val, int eo, int width, int height); \
766 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
767 const int16_t *sao_offset_val, int eo, int width, int height); \
768 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
769 const int16_t *sao_offset_val, int eo, int width, int height); \
770 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
771 const int16_t *sao_offset_val, int eo, int width, int height); \
780 #define SAO_EDGE_INIT(bitd, opt) do { \
781 c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
782 c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
783 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
784 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
785 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
788 #define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \
789 dst [idx1][idx2][idx3] = hevc_put_ ## name ## _ ## D ## _##opt; \
790 dst ## _bi [idx1][idx2][idx3] = ff_hevc_put_bi_ ## name ## _ ## D ## _##opt; \
791 dst ## _uni [idx1][idx2][idx3] = hevc_put_uni_ ## name ## _ ## D ## _##opt; \
792 dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt; \
793 dst ## _bi_w [idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt
795 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \
796 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
797 PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \
798 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
799 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
800 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
801 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
802 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
803 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
804 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
805 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \
806 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
807 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
808 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
809 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
810 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
811 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
812 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
813 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
824 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
825 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
827 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
828 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
830 c->idct[2] = ff_hevc_idct_16x16_8_sse2;
831 c->idct[3] = ff_hevc_idct_32x32_8_sse2;
835 c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_sse2;
836 c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
837 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
838 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
840 c->idct[0] = ff_hevc_idct_4x4_8_sse2;
841 c->idct[1] = ff_hevc_idct_8x8_8_sse2;
849 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
850 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
855 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
858 EPEL_LINKS(
c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
859 EPEL_LINKS(
c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
860 EPEL_LINKS(
c->put_hevc_epel, 1, 0, epel_v, 8, sse4);
861 EPEL_LINKS(
c->put_hevc_epel, 1, 1, epel_hv, 8, sse4);
863 QPEL_LINKS(
c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
864 QPEL_LINKS(
c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4);
865 QPEL_LINKS(
c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
866 QPEL_LINKS(
c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
870 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
871 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
873 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
874 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
876 c->idct[2] = ff_hevc_idct_16x16_8_avx;
877 c->idct[3] = ff_hevc_idct_32x32_8_avx;
881 c->idct[0] = ff_hevc_idct_4x4_8_avx;
882 c->idct[1] = ff_hevc_idct_8x8_8_avx;
885 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
886 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
888 #if HAVE_AVX2_EXTERNAL
890 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
891 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
894 c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_8_avx2;
895 c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_8_avx2;
896 c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_8_avx2;
898 c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_8_avx2;
899 c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_8_avx2;
900 c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_8_avx2;
902 c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
903 c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
904 c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
906 c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
907 c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
908 c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
910 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2;
911 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2;
912 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2;
914 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2;
915 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2;
916 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2;
918 c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_8_avx2;
919 c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_8_avx2;
920 c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_8_avx2;
922 c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_8_avx2;
923 c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_8_avx2;
924 c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_8_avx2;
926 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_8_avx2;
927 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_8_avx2;
928 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_8_avx2;
930 c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_8_avx2;
931 c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_8_avx2;
932 c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_8_avx2;
934 c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_8_avx2;
935 c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_8_avx2;
936 c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_8_avx2;
938 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_8_avx2;
939 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_8_avx2;
940 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_8_avx2;
942 c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_8_avx2;
943 c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_8_avx2;
944 c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_8_avx2;
946 c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_8_avx2;
947 c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_8_avx2;
948 c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_8_avx2;
950 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_8_avx2;
951 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_8_avx2;
952 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_8_avx2;
954 c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_8_avx2;
955 c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_8_avx2;
956 c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_8_avx2;
958 c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_8_avx2;
959 c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_8_avx2;
960 c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_8_avx2;
962 c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_8_avx2;
963 c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_8_avx2;
964 c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_8_avx2;
966 c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_8_avx2;
967 c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_8_avx2;
968 c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_8_avx2;
970 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_8_avx2;
971 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_8_avx2;
972 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_8_avx2;
974 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_8_avx2;
975 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_8_avx2;
976 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_8_avx2;
981 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
982 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
983 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
1003 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
1004 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
1006 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
1007 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
1009 c->idct[2] = ff_hevc_idct_16x16_10_sse2;
1010 c->idct[3] = ff_hevc_idct_32x32_10_sse2;
1015 c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_sse2;
1016 c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
1017 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
1018 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
1020 c->idct[0] = ff_hevc_idct_4x4_10_sse2;
1021 c->idct[1] = ff_hevc_idct_8x8_10_sse2;
1029 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
1030 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
1033 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
1035 EPEL_LINKS(
c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
1036 EPEL_LINKS(
c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
1037 EPEL_LINKS(
c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
1038 EPEL_LINKS(
c->put_hevc_epel, 1, 1, epel_hv, 10, sse4);
1040 QPEL_LINKS(
c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
1041 QPEL_LINKS(
c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4);
1042 QPEL_LINKS(
c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
1043 QPEL_LINKS(
c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
1047 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
1048 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
1050 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
1051 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
1053 c->idct[2] = ff_hevc_idct_16x16_10_avx;
1054 c->idct[3] = ff_hevc_idct_32x32_10_avx;
1057 c->idct[0] = ff_hevc_idct_4x4_10_avx;
1058 c->idct[1] = ff_hevc_idct_8x8_10_avx;
1063 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
1065 #if HAVE_AVX2_EXTERNAL
1067 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
1068 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
1071 c->put_hevc_epel[5][0][0] = hevc_put_pel_pixels16_10_avx2;
1072 c->put_hevc_epel[6][0][0] = hevc_put_pel_pixels24_10_avx2;
1073 c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_10_avx2;
1074 c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_10_avx2;
1075 c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_10_avx2;
1077 c->put_hevc_qpel[5][0][0] = hevc_put_pel_pixels16_10_avx2;
1078 c->put_hevc_qpel[6][0][0] = hevc_put_pel_pixels24_10_avx2;
1079 c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_10_avx2;
1080 c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_10_avx2;
1081 c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_10_avx2;
1083 c->put_hevc_epel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
1084 c->put_hevc_epel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
1085 c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
1086 c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2;
1087 c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2;
1089 c->put_hevc_qpel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
1090 c->put_hevc_qpel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
1091 c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
1092 c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2;
1093 c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2;
1095 c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2;
1096 c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2;
1097 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2;
1098 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2;
1099 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2;
1100 c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2;
1101 c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2;
1102 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2;
1103 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2;
1104 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2;
1106 c->put_hevc_epel[5][0][1] = hevc_put_epel_h16_10_avx2;
1107 c->put_hevc_epel[6][0][1] = hevc_put_epel_h24_10_avx2;
1108 c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_10_avx2;
1109 c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_10_avx2;
1110 c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_10_avx2;
1112 c->put_hevc_epel_uni[5][0][1] = hevc_put_uni_epel_h16_10_avx2;
1113 c->put_hevc_epel_uni[6][0][1] = hevc_put_uni_epel_h24_10_avx2;
1114 c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_10_avx2;
1115 c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_10_avx2;
1116 c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_10_avx2;
1118 c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_bi_epel_h16_10_avx2;
1119 c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_bi_epel_h24_10_avx2;
1120 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_10_avx2;
1121 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_10_avx2;
1122 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_10_avx2;
1124 c->put_hevc_epel[5][1][0] = hevc_put_epel_v16_10_avx2;
1125 c->put_hevc_epel[6][1][0] = hevc_put_epel_v24_10_avx2;
1126 c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_10_avx2;
1127 c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_10_avx2;
1128 c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_10_avx2;
1130 c->put_hevc_epel_uni[5][1][0] = hevc_put_uni_epel_v16_10_avx2;
1131 c->put_hevc_epel_uni[6][1][0] = hevc_put_uni_epel_v24_10_avx2;
1132 c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_10_avx2;
1133 c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_10_avx2;
1134 c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_10_avx2;
1136 c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_bi_epel_v16_10_avx2;
1137 c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_bi_epel_v24_10_avx2;
1138 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_10_avx2;
1139 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_10_avx2;
1140 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_10_avx2;
1142 c->put_hevc_epel[5][1][1] = hevc_put_epel_hv16_10_avx2;
1143 c->put_hevc_epel[6][1][1] = hevc_put_epel_hv24_10_avx2;
1144 c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_10_avx2;
1145 c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_10_avx2;
1146 c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_10_avx2;
1148 c->put_hevc_epel_uni[5][1][1] = hevc_put_uni_epel_hv16_10_avx2;
1149 c->put_hevc_epel_uni[6][1][1] = hevc_put_uni_epel_hv24_10_avx2;
1150 c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_10_avx2;
1151 c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_10_avx2;
1152 c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_10_avx2;
1154 c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_bi_epel_hv16_10_avx2;
1155 c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_bi_epel_hv24_10_avx2;
1156 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_10_avx2;
1157 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_10_avx2;
1158 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_10_avx2;
1160 c->put_hevc_qpel[5][0][1] = hevc_put_qpel_h16_10_avx2;
1161 c->put_hevc_qpel[6][0][1] = hevc_put_qpel_h24_10_avx2;
1162 c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_10_avx2;
1163 c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_10_avx2;
1164 c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_10_avx2;
1166 c->put_hevc_qpel_uni[5][0][1] = hevc_put_uni_qpel_h16_10_avx2;
1167 c->put_hevc_qpel_uni[6][0][1] = hevc_put_uni_qpel_h24_10_avx2;
1168 c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_10_avx2;
1169 c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_10_avx2;
1170 c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_10_avx2;
1172 c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_bi_qpel_h16_10_avx2;
1173 c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_bi_qpel_h24_10_avx2;
1174 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_10_avx2;
1175 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_10_avx2;
1176 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_10_avx2;
1178 c->put_hevc_qpel[5][1][0] = hevc_put_qpel_v16_10_avx2;
1179 c->put_hevc_qpel[6][1][0] = hevc_put_qpel_v24_10_avx2;
1180 c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_10_avx2;
1181 c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_10_avx2;
1182 c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_10_avx2;
1184 c->put_hevc_qpel_uni[5][1][0] = hevc_put_uni_qpel_v16_10_avx2;
1185 c->put_hevc_qpel_uni[6][1][0] = hevc_put_uni_qpel_v24_10_avx2;
1186 c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_10_avx2;
1187 c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_10_avx2;
1188 c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_10_avx2;
1190 c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_bi_qpel_v16_10_avx2;
1191 c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_bi_qpel_v24_10_avx2;
1192 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_10_avx2;
1193 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_10_avx2;
1194 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_10_avx2;
1196 c->put_hevc_qpel[5][1][1] = hevc_put_qpel_hv16_10_avx2;
1197 c->put_hevc_qpel[6][1][1] = hevc_put_qpel_hv24_10_avx2;
1198 c->put_hevc_qpel[7][1][1] = hevc_put_qpel_hv32_10_avx2;
1199 c->put_hevc_qpel[8][1][1] = hevc_put_qpel_hv48_10_avx2;
1200 c->put_hevc_qpel[9][1][1] = hevc_put_qpel_hv64_10_avx2;
1202 c->put_hevc_qpel_uni[5][1][1] = hevc_put_uni_qpel_hv16_10_avx2;
1203 c->put_hevc_qpel_uni[6][1][1] = hevc_put_uni_qpel_hv24_10_avx2;
1204 c->put_hevc_qpel_uni[7][1][1] = hevc_put_uni_qpel_hv32_10_avx2;
1205 c->put_hevc_qpel_uni[8][1][1] = hevc_put_uni_qpel_hv48_10_avx2;
1206 c->put_hevc_qpel_uni[9][1][1] = hevc_put_uni_qpel_hv64_10_avx2;
1208 c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_bi_qpel_hv16_10_avx2;
1209 c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_bi_qpel_hv24_10_avx2;
1210 c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_bi_qpel_hv32_10_avx2;
1211 c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_bi_qpel_hv48_10_avx2;
1212 c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_bi_qpel_hv64_10_avx2;
1224 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1225 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1227 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1228 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1233 c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_sse2;
1234 c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1235 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1236 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1240 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1241 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1244 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
1246 EPEL_LINKS(
c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1247 EPEL_LINKS(
c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
1248 EPEL_LINKS(
c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
1249 EPEL_LINKS(
c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
1251 QPEL_LINKS(
c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1252 QPEL_LINKS(
c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
1253 QPEL_LINKS(
c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
1254 QPEL_LINKS(
c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
1258 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1259 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1261 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1262 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1267 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1270 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1271 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;