FFmpeg
hevc_pel.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Henrik Gramner
3  * Copyright (c) 2021 Josh Dekker
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <string.h>
23 #include "checkasm.h"
24 #include "libavcodec/hevc/dsp.h"
25 #include "libavutil/common.h"
26 #include "libavutil/internal.h"
27 #include "libavutil/intreadwrite.h"
28 
29 static const uint32_t pixel_mask[] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
30 static const uint32_t pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
31 static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 };
32 static const int weights[] = { 0, 128, 255, -1 };
33 static const int denoms[] = {0, 7, 12, -1 };
34 static const int offsets[] = {0, 255, -1 };
35 
36 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
37 #define BUF_SIZE (2 * MAX_PB_SIZE * (2 * 4 + MAX_PB_SIZE))
38 
39 #define randomize_buffers() \
40  do { \
41  uint32_t mask = pixel_mask[bit_depth - 8]; \
42  int k; \
43  for (k = 0; k < BUF_SIZE + SRC_EXTRA; k += 4) { \
44  uint32_t r = rnd() & mask; \
45  AV_WN32A(buf0 + k, r); \
46  AV_WN32A(buf1 + k, r); \
47  } \
48  } while (0)
49 
50 #define randomize_buffers_dst() \
51  do { \
52  int k; \
53  for (k = 0; k < BUF_SIZE; k += 4) { \
54  uint32_t r = rnd(); \
55  AV_WN32A(dst0 + k, r); \
56  AV_WN32A(dst1 + k, r); \
57  } \
58  } while (0)
59 
60 #define randomize_buffers_ref() \
61  randomize_buffers(); \
62  do { \
63  uint32_t mask = pixel_mask16[bit_depth - 8]; \
64  int k; \
65  for (k = 0; k < BUF_SIZE; k += 2) { \
66  uint32_t r = rnd() & mask; \
67  AV_WN32A(ref0 + k, r); \
68  AV_WN32A(ref1 + k, r); \
69  } \
70  } while (0)
71 
72 #define src0 (buf0 + 2 * 4 * MAX_PB_SIZE) /* hevc qpel functions read data from negative src pointer offsets */
73 #define src1 (buf1 + 2 * 4 * MAX_PB_SIZE)
74 
75 /* FIXME: Does the need for SRC_EXTRA for these tests indicate a bug? */
76 #define SRC_EXTRA 8
77 
78 static void checkasm_check_hevc_qpel(void)
79 {
80  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
81  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
82  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
83  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
84 
86  int size, bit_depth, i, j;
87  declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
88  int height, intptr_t mx, intptr_t my, int width);
89 
90  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
92 
93  for (i = 0; i < 2; i++) {
94  for (j = 0; j < 2; j++) {
95  for (size = 1; size < 10; size++) {
96  const char *type;
97  switch ((j << 1) | i) {
98  case 0: type = "pel_pixels"; break; // 0 0
99  case 1: type = "qpel_h"; break; // 0 1
100  case 2: type = "qpel_v"; break; // 1 0
101  case 3: type = "qpel_hv"; break; // 1 1
102  }
103 
104  if (check_func(h.put_hevc_qpel[size][j][i],
105  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
106  int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
109  call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
110  call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
111  checkasm_check(int16_t, dstw0, MAX_PB_SIZE * sizeof(int16_t),
112  dstw1, MAX_PB_SIZE * sizeof(int16_t),
113  size[sizes], size[sizes], "dst");
114  bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
115  }
116  }
117  }
118  }
119  }
120  report("qpel");
121 }
122 
124 {
125  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
126  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
127  PIXEL_RECT(dst0, 64, 64);
128  PIXEL_RECT(dst1, 64, 64);
129 
131  int size, bit_depth, i, j;
132  declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
133  int height, intptr_t mx, intptr_t my, int width);
134 
135  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
137 
138  for (i = 0; i < 2; i++) {
139  for (j = 0; j < 2; j++) {
140  for (size = 1; size < 10; size++) {
141  const char *type;
142  switch ((j << 1) | i) {
143  case 0: type = "pel_uni_pixels"; break; // 0 0
144  case 1: type = "qpel_uni_h"; break; // 0 1
145  case 2: type = "qpel_uni_v"; break; // 1 0
146  case 3: type = "qpel_uni_hv"; break; // 1 1
147  }
148 
149  if (check_func(h.put_hevc_qpel_uni[size][j][i],
150  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
152  CLEAR_PIXEL_RECT(dst0);
153  CLEAR_PIXEL_RECT(dst1);
154  call_ref(dst0, dst0_stride,
156  sizes[size], i, j, sizes[size]);
157  call_new(dst1, dst1_stride,
159  sizes[size], i, j, sizes[size]);
160  checkasm_check_pixel_padded(dst0, dst0_stride,
161  dst1, dst1_stride,
162  size[sizes], size[sizes], "dst");
163  bench_new(dst1, dst1_stride,
165  sizes[size], i, j, sizes[size]);
166  }
167  }
168  }
169  }
170  }
171  report("qpel_uni");
172 }
173 
175 {
176  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
177  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
178  PIXEL_RECT(dst0, 64, 64);
179  PIXEL_RECT(dst1, 64, 64);
180 
182  int size, bit_depth, i, j;
183  const int *denom, *wx, *ox;
184  declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
185  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
186 
187  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
189 
190  for (i = 0; i < 2; i++) {
191  for (j = 0; j < 2; j++) {
192  for (size = 1; size < 10; size++) {
193  const char *type;
194  switch ((j << 1) | i) {
195  case 0: type = "pel_uni_w_pixels"; break; // 0 0
196  case 1: type = "qpel_uni_w_h"; break; // 0 1
197  case 2: type = "qpel_uni_w_v"; break; // 1 0
198  case 3: type = "qpel_uni_w_hv"; break; // 1 1
199  }
200 
201  if (check_func(h.put_hevc_qpel_uni_w[size][j][i],
202  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
203  for (denom = denoms; *denom >= 0; denom++) {
204  for (wx = weights; *wx >= 0; wx++) {
205  for (ox = offsets; *ox >= 0; ox++) {
207  CLEAR_PIXEL_RECT(dst0);
208  CLEAR_PIXEL_RECT(dst1);
209  call_ref(dst0, dst0_stride,
211  sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
212  call_new(dst1, dst1_stride,
214  sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
215  checkasm_check_pixel_padded(dst0, dst0_stride,
216  dst1, dst1_stride,
217  size[sizes], size[sizes], "dst");
218  bench_new(dst1, dst1_stride,
220  sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
221  }
222  }
223  }
224  }
225  }
226  }
227  }
228  }
229  report("qpel_uni_w");
230 }
231 
233 {
234  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
235  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
236  PIXEL_RECT(dst0, 64, 64);
237  PIXEL_RECT(dst1, 64, 64);
238  LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
239  LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
240 
242  int size, bit_depth, i, j;
243  declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
244  const int16_t *src2,
245  int height, intptr_t mx, intptr_t my, int width);
246 
247  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
249 
250  for (i = 0; i < 2; i++) {
251  for (j = 0; j < 2; j++) {
252  for (size = 1; size < 10; size++) {
253  const char *type;
254  switch ((j << 1) | i) {
255  case 0: type = "pel_bi_pixels"; break; // 0 0
256  case 1: type = "qpel_bi_h"; break; // 0 1
257  case 2: type = "qpel_bi_v"; break; // 1 0
258  case 3: type = "qpel_bi_hv"; break; // 1 1
259  }
260 
261  if (check_func(h.put_hevc_qpel_bi[size][j][i],
262  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
264  CLEAR_PIXEL_RECT(dst0);
265  CLEAR_PIXEL_RECT(dst1);
266  call_ref(dst0, dst0_stride,
268  ref0, sizes[size], i, j, sizes[size]);
269  call_new(dst1, dst1_stride,
271  ref1, sizes[size], i, j, sizes[size]);
272  checkasm_check_pixel_padded(dst0, dst0_stride,
273  dst1, dst1_stride,
274  size[sizes], size[sizes], "dst");
275  bench_new(dst1, dst1_stride,
277  ref1, sizes[size], i, j, sizes[size]);
278  }
279  }
280  }
281  }
282  }
283  report("qpel_bi");
284 }
285 
287 {
288  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
289  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
290  PIXEL_RECT(dst0, 64, 64);
291  PIXEL_RECT(dst1, 64, 64);
292  LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
293  LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
294 
296  int size, bit_depth, i, j;
297  const int *denom, *wx, *ox;
298  declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
299  const int16_t *src2,
300  int height, int denom, int wx0, int wx1,
301  int ox0, int ox1, intptr_t mx, intptr_t my, int width);
302 
303  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
305 
306  for (i = 0; i < 2; i++) {
307  for (j = 0; j < 2; j++) {
308  for (size = 1; size < 10; size++) {
309  const char *type;
310  switch ((j << 1) | i) {
311  case 0: type = "pel_bi_w_pixels"; break; // 0 0
312  case 1: type = "qpel_bi_w_h"; break; // 0 1
313  case 2: type = "qpel_bi_w_v"; break; // 1 0
314  case 3: type = "qpel_bi_w_hv"; break; // 1 1
315  }
316 
317  if (check_func(h.put_hevc_qpel_bi_w[size][j][i],
318  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
319  for (denom = denoms; *denom >= 0; denom++) {
320  for (wx = weights; *wx >= 0; wx++) {
321  for (ox = offsets; *ox >= 0; ox++) {
323  CLEAR_PIXEL_RECT(dst0);
324  CLEAR_PIXEL_RECT(dst1);
325  call_ref(dst0, dst0_stride,
327  ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
328  call_new(dst1, dst1_stride,
330  ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
331  checkasm_check_pixel_padded(dst0, dst0_stride,
332  dst1, dst1_stride,
333  size[sizes], size[sizes], "dst");
334  bench_new(dst1, dst1_stride,
336  ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
337  }
338  }
339  }
340  }
341  }
342  }
343  }
344  }
345  report("qpel_bi_w");
346 }
347 
348 #undef SRC_EXTRA
349 #define SRC_EXTRA 0
350 
351 static void checkasm_check_hevc_epel(void)
352 {
353  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
354  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
355  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
356  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
357 
359  int size, bit_depth, i, j;
360  declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
361  int height, intptr_t mx, intptr_t my, int width);
362 
363  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
365 
366  for (i = 0; i < 2; i++) {
367  for (j = 0; j < 2; j++) {
368  for (size = 1; size < 10; size++) {
369  const char *type;
370  switch ((j << 1) | i) {
371  case 0: type = "pel_pixels"; break; // 0 0
372  case 1: type = "epel_h"; break; // 0 1
373  case 2: type = "epel_v"; break; // 1 0
374  case 3: type = "epel_hv"; break; // 1 1
375  }
376 
377  if (check_func(h.put_hevc_epel[size][j][i],
378  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
379  int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
382  call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
383  call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
384  checkasm_check(int16_t, dstw0, MAX_PB_SIZE * sizeof(int16_t),
385  dstw1, MAX_PB_SIZE * sizeof(int16_t),
386  size[sizes], size[sizes], "dst");
387  bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
388  }
389  }
390  }
391  }
392  }
393  report("epel");
394 }
395 
397 {
398  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
399  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
400  PIXEL_RECT(dst0, 64, 64);
401  PIXEL_RECT(dst1, 64, 64);
402 
404  int size, bit_depth, i, j;
405  declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
406  int height, intptr_t mx, intptr_t my, int width);
407 
408  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
410 
411  for (i = 0; i < 2; i++) {
412  for (j = 0; j < 2; j++) {
413  for (size = 1; size < 10; size++) {
414  const char *type;
415  switch ((j << 1) | i) {
416  case 0: type = "pel_uni_pixels"; break; // 0 0
417  case 1: type = "epel_uni_h"; break; // 0 1
418  case 2: type = "epel_uni_v"; break; // 1 0
419  case 3: type = "epel_uni_hv"; break; // 1 1
420  }
421 
422  if (check_func(h.put_hevc_epel_uni[size][j][i],
423  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
425  CLEAR_PIXEL_RECT(dst0);
426  CLEAR_PIXEL_RECT(dst1);
427  call_ref(dst0, dst0_stride,
429  sizes[size], i, j, sizes[size]);
430  call_new(dst1, dst1_stride,
432  sizes[size], i, j, sizes[size]);
433  checkasm_check_pixel_padded(dst0, dst0_stride,
434  dst1, dst1_stride,
435  size[sizes], size[sizes], "dst");
436  bench_new(dst1, dst1_stride,
438  sizes[size], i, j, sizes[size]);
439  }
440  }
441  }
442  }
443  }
444  report("epel_uni");
445 }
446 
448 {
449  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
450  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
451  PIXEL_RECT(dst0, 64, 64);
452  PIXEL_RECT(dst1, 64, 64);
453 
455  int size, bit_depth, i, j;
456  const int *denom, *wx, *ox;
457  declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
458  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
459 
460  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
462 
463  for (i = 0; i < 2; i++) {
464  for (j = 0; j < 2; j++) {
465  for (size = 1; size < 10; size++) {
466  const char *type;
467  switch ((j << 1) | i) {
468  case 0: type = "pel_uni_w_pixels"; break; // 0 0
469  case 1: type = "epel_uni_w_h"; break; // 0 1
470  case 2: type = "epel_uni_w_v"; break; // 1 0
471  case 3: type = "epel_uni_w_hv"; break; // 1 1
472  }
473 
474  if (check_func(h.put_hevc_epel_uni_w[size][j][i],
475  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
476  for (denom = denoms; *denom >= 0; denom++) {
477  for (wx = weights; *wx >= 0; wx++) {
478  for (ox = offsets; *ox >= 0; ox++) {
480  CLEAR_PIXEL_RECT(dst0);
481  CLEAR_PIXEL_RECT(dst1);
482  call_ref(dst0, dst0_stride,
484  sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
485  call_new(dst1, dst1_stride,
487  sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
488  checkasm_check_pixel_padded(dst0, dst0_stride,
489  dst1, dst1_stride,
490  size[sizes], size[sizes], "dst");
491  bench_new(dst1, dst1_stride,
493  sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
494  }
495  }
496  }
497  }
498  }
499  }
500  }
501  }
502  report("epel_uni_w");
503 }
504 
506 {
507  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
508  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
509  PIXEL_RECT(dst0, 64, 64);
510  PIXEL_RECT(dst1, 64, 64);
511  LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
512  LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
513 
515  int size, bit_depth, i, j;
516  declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
517  const int16_t *src2,
518  int height, intptr_t mx, intptr_t my, int width);
519 
520  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
522 
523  for (i = 0; i < 2; i++) {
524  for (j = 0; j < 2; j++) {
525  for (size = 1; size < 10; size++) {
526  const char *type;
527  switch ((j << 1) | i) {
528  case 0: type = "pel_bi_pixels"; break; // 0 0
529  case 1: type = "epel_bi_h"; break; // 0 1
530  case 2: type = "epel_bi_v"; break; // 1 0
531  case 3: type = "epel_bi_hv"; break; // 1 1
532  }
533 
534  if (check_func(h.put_hevc_epel_bi[size][j][i],
535  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
537  CLEAR_PIXEL_RECT(dst0);
538  CLEAR_PIXEL_RECT(dst1);
539  call_ref(dst0, dst0_stride,
541  ref0, sizes[size], i, j, sizes[size]);
542  call_new(dst1, dst1_stride,
544  ref1, sizes[size], i, j, sizes[size]);
545  checkasm_check_pixel_padded(dst0, dst0_stride,
546  dst1, dst1_stride,
547  size[sizes], size[sizes], "dst");
548  bench_new(dst1, dst1_stride,
550  ref1, sizes[size], i, j, sizes[size]);
551  }
552  }
553  }
554  }
555  }
556  report("epel_bi");
557 }
558 
560 {
561  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
562  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
563  PIXEL_RECT(dst0, 64, 64);
564  PIXEL_RECT(dst1, 64, 64);
565  LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
566  LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
567 
569  int size, bit_depth, i, j;
570  const int *denom, *wx, *ox;
571  declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
572  const int16_t *src2,
573  int height, int denom, int wx0, int wx1,
574  int ox0, int ox1, intptr_t mx, intptr_t my, int width);
575 
576  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
578 
579  for (i = 0; i < 2; i++) {
580  for (j = 0; j < 2; j++) {
581  for (size = 1; size < 10; size++) {
582  const char *type;
583  switch ((j << 1) | i) {
584  case 0: type = "pel_bi_w_pixels"; break; // 0 0
585  case 1: type = "epel_bi_w_h"; break; // 0 1
586  case 2: type = "epel_bi_w_v"; break; // 1 0
587  case 3: type = "epel_bi_w_hv"; break; // 1 1
588  }
589 
590  if (check_func(h.put_hevc_epel_bi_w[size][j][i],
591  "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
592  for (denom = denoms; *denom >= 0; denom++) {
593  for (wx = weights; *wx >= 0; wx++) {
594  for (ox = offsets; *ox >= 0; ox++) {
596  CLEAR_PIXEL_RECT(dst0);
597  CLEAR_PIXEL_RECT(dst1);
598  call_ref(dst0, dst0_stride,
600  ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
601  call_new(dst1, dst1_stride,
603  ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
604  checkasm_check_pixel_padded(dst0, dst0_stride,
605  dst1, dst1_stride,
606  size[sizes], size[sizes], "dst");
607  bench_new(dst1, dst1_stride,
609  ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
610  }
611  }
612  }
613  }
614  }
615  }
616  }
617  }
618  report("epel_bi_w");
619 }
620 
622 {
633 }
ff_hevc_dsp_init
void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
Definition: dsp.c:128
checkasm_check_hevc_qpel_bi
static void checkasm_check_hevc_qpel_bi(void)
Definition: hevc_pel.c:232
checkasm_check_hevc_epel_bi_w
static void checkasm_check_hevc_epel_bi_w(void)
Definition: hevc_pel.c:559
checkasm_check_hevc_epel_uni
static void checkasm_check_hevc_epel_uni(void)
Definition: hevc_pel.c:396
src0
#define src0
Definition: hevc_pel.c:72
pixel_mask16
static const uint32_t pixel_mask16[]
Definition: hevc_pel.c:30
check_func
#define check_func(func,...)
Definition: checkasm.h:185
checkasm_check_hevc_epel
static void checkasm_check_hevc_epel(void)
Definition: hevc_pel.c:351
checkasm_check_pixel_padded
#define checkasm_check_pixel_padded(...)
Definition: checkasm.h:439
call_ref
#define call_ref(...)
Definition: checkasm.h:200
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
mx
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
Definition: dsp.h:53
checkasm.h
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
checkasm_check_hevc_qpel
static void checkasm_check_hevc_qpel(void)
Definition: hevc_pel.c:78
SRC_EXTRA
#define SRC_EXTRA
Definition: hevc_pel.c:349
checkasm_check_hevc_pel
void checkasm_check_hevc_pel(void)
Definition: hevc_pel.c:621
intreadwrite.h
offsets
static const int offsets[]
Definition: hevc_pel.c:34
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: hevc_pel.c:36
my
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
Definition: dsp.h:53
srcstride
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t const uint8_t ptrdiff_t srcstride
Definition: dsp.h:84
checkasm_check_hevc_qpel_uni_w
static void checkasm_check_hevc_qpel_uni_w(void)
Definition: hevc_pel.c:174
call_new
#define call_new(...)
Definition: checkasm.h:303
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:132
CLEAR_PIXEL_RECT
#define CLEAR_PIXEL_RECT(name)
Definition: checkasm.h:393
dsp.h
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
size
int size
Definition: twinvq_data.h:10344
HEVCDSPContext
Definition: dsp.h:47
report
#define report
Definition: checkasm.h:197
bench_new
#define bench_new(...)
Definition: checkasm.h:374
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
internal.h
src2
const pixel * src2
Definition: h264pred_template.c:421
weights
static const int weights[]
Definition: hevc_pel.c:32
common.h
checkasm_check_hevc_epel_bi
static void checkasm_check_hevc_epel_bi(void)
Definition: hevc_pel.c:505
checkasm_check_hevc_qpel_uni
static void checkasm_check_hevc_qpel_uni(void)
Definition: hevc_pel.c:123
MAX_PB_SIZE
#define MAX_PB_SIZE
Definition: dsp.h:32
randomize_buffers_ref
#define randomize_buffers_ref()
Definition: hevc_pel.c:60
pixel_mask
static const uint32_t pixel_mask[]
Definition: hevc_pel.c:29
denoms
static const int denoms[]
Definition: hevc_pel.c:33
randomize_buffers_dst
#define randomize_buffers_dst()
Definition: hevc_pel.c:50
PIXEL_RECT
#define PIXEL_RECT(name, w, h)
Definition: checkasm.h:385
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:189
BUF_SIZE
#define BUF_SIZE
Definition: hevc_pel.c:37
sizes
static const int sizes[]
Definition: hevc_pel.c:31
checkasm_check_hevc_qpel_bi_w
static void checkasm_check_hevc_qpel_bi_w(void)
Definition: hevc_pel.c:286
src1
#define src1
Definition: hevc_pel.c:73
h
h
Definition: vp9dsp_template.c:2070
checkasm_check
#define checkasm_check(prefix,...)
Definition: checkasm.h:414
width
#define width
Definition: dsp.h:85
checkasm_check_hevc_epel_uni_w
static void checkasm_check_hevc_epel_uni_w(void)
Definition: hevc_pel.c:447
src
#define src
Definition: vp8dsp.c:248
randomize_buffers
#define randomize_buffers()
Definition: hevc_pel.c:39