FFmpeg
uops_tmpl.h
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2026 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef SWSCALE_UOPS_TMPL_H
22 #define SWSCALE_UOPS_TMPL_H
23 
24 /**
25  * Helper macros for the C-based backend.
26  *
27  * To use these macros, `pixel_t` should be defined as the type of pixels.
28  */
29 
30 #include <assert.h>
31 #include <float.h>
32 #include <stdint.h>
33 
34 #include "libavutil/attributes.h"
35 
36 #include "ops_chain.h"
37 #include "uops_macros.h"
38 
39 #ifndef SWS_BLOCK_SIZE
40 # define SWS_BLOCK_SIZE 32
41 #endif
42 
43 typedef union block_t {
44  uint8_t u8[SWS_BLOCK_SIZE];
45  uint16_t u16[SWS_BLOCK_SIZE];
46  uint32_t u32[SWS_BLOCK_SIZE];
48 } block_t;
49 
50 #define SIZEOF_BLOCK (sizeof(pixel_t) * SWS_BLOCK_SIZE)
51 
52 /**
53  * Internal context holding per-iter execution data. The data pointers will be
54  * directly incremented by the corresponding read/write functions.
55  */
56 typedef struct SwsOpIter {
57  uintptr_t in[4];
58  uintptr_t out[4];
59  int x, y;
60 
61  /* Link back to per-slice execution context */
62  const SwsOpExec *exec;
63 } SwsOpIter;
64 
65 #ifdef __clang__
66 # define SWS_LOOP AV_PRAGMA(clang loop vectorize(assume_safety))
67 #elif defined(__GNUC__)
68 # define SWS_LOOP AV_PRAGMA(GCC ivdep)
69 #else
70 # define SWS_LOOP
71 #endif
72 
73 /* Miscellaneous helpers */
74 #define bitfn2(name, ext) name ## _ ## ext
75 #define bitfn(name, ext) bitfn2(name, ext)
76 #define fn(name) bitfn(name, PX)
77 
78 #define bump_ptr(ptr, bump) ((pixel_t *) ((uintptr_t) (ptr) + (bump)))
79 
80 /* Helpers for dealing with component masks */
81 #define X SWS_COMP_TEST(mask, 0)
82 #define Y SWS_COMP_TEST(mask, 1)
83 #define Z SWS_COMP_TEST(mask, 2)
84 #define W SWS_COMP_TEST(mask, 3)
85 
86 /* Helper macros to make writing common function signatures less painful */
87 #define DECL_FUNC(NAME, ...) \
88  av_always_inline static void \
89  fn(NAME)(SwsOpIter *restrict iter, const SwsOpImpl *restrict impl, \
90  pixel_t *restrict x, pixel_t *restrict y, \
91  pixel_t *restrict z, pixel_t *restrict w, \
92  __VA_ARGS__)
93 
94 #define DECL_READ(NAME, ...) \
95  DECL_FUNC(NAME, __VA_ARGS__, \
96  const pixel_t *restrict in0, const pixel_t *restrict in1, \
97  const pixel_t *restrict in2, const pixel_t *restrict in3) \
98 
99 #define DECL_WRITE(NAME, ...) \
100  DECL_FUNC(NAME, __VA_ARGS__, \
101  pixel_t *restrict out0, pixel_t *restrict out1, \
102  pixel_t *restrict out2, pixel_t *restrict out3) \
103 
104 #define CALL(NAME, ...) fn(NAME)(iter, impl, x, y, z, w, __VA_ARGS__)
105 
106 /* Helper macro to call into the next continuation with a given type */
107 #define CONTINUE(...) \
108  ((void (*)(SwsOpIter *, const SwsOpImpl *, \
109  void *restrict x, void *restrict y, \
110  void *restrict z, void *restrict w)) impl->cont) \
111  (iter, &impl[1], __VA_ARGS__)
112 
113 /* Helper macros for common op setup code */
114 #define DECL_SETUP(NAME, PARAMS, OUT) \
115  av_unused static int fn(NAME)(const SwsImplParams *PARAMS, \
116  SwsImplResult *OUT)
117 
118 /* Helper macro for declaring kernel entry points */
119 #define DECL_IMPL(FUNC, NAME, TYPE, UOP, ...) \
120  av_flatten static void NAME##_c(SwsOpIter *restrict iter, \
121  const SwsOpImpl *restrict impl, \
122  void *restrict x, void *restrict y, \
123  void *restrict z, void *restrict w) \
124  { \
125  CALL(FUNC, __VA_ARGS__); \
126  }
127 
128 #define DECL_IMPL_READ(...) \
129  DECL_IMPL(__VA_ARGS__, \
130  (const pixel_t *) iter->in[0], (const pixel_t *) iter->in[1], \
131  (const pixel_t *) iter->in[2], (const pixel_t *) iter->in[3])
132 
133 #define DECL_IMPL_WRITE(...) \
134  DECL_IMPL(__VA_ARGS__, \
135  (pixel_t *) iter->out[0], (pixel_t *) iter->out[1], \
136  (pixel_t *) iter->out[2], (pixel_t *) iter->out[3])
137 
138 #define REF_ENTRY(DUMMY, NAME, ...) &op_##NAME,
139 #define DECL_ENTRY(SETUP, NAME, ...) \
140  static const SwsOpEntry op_##NAME = { \
141  .func = (SwsFuncPtr) NAME##_c, \
142  __VA_ARGS__, \
143  SETUP \
144  };
145 
146 #endif
SwsOpIter::exec
const SwsOpExec * exec
Definition: uops_tmpl.h:62
block_t::f32
float f32[SWS_BLOCK_SIZE]
Definition: uops_tmpl.h:47
float.h
SwsOpIter
Internal context holding per-iter execution data.
Definition: uops_tmpl.h:56
SwsOpIter::x
int x
Definition: uops_tmpl.h:59
ops_chain.h
uops_macros.h
SwsOpIter::out
uintptr_t out[4]
Definition: uops_tmpl.h:58
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
block_t
Definition: uops_tmpl.h:43
attributes.h
block_t::u8
uint8_t u8[SWS_BLOCK_SIZE]
Definition: uops_tmpl.h:44
SWS_BLOCK_SIZE
#define SWS_BLOCK_SIZE
Copyright (C) 2026 Niklas Haas.
Definition: uops_tmpl.h:40
SwsOpIter::in
uintptr_t in[4]
Definition: uops_tmpl.h:57
block_t::u32
uint32_t u32[SWS_BLOCK_SIZE]
Definition: uops_tmpl.h:46
block_t::u16
uint16_t u16[SWS_BLOCK_SIZE]
Definition: uops_tmpl.h:45
SwsOpIter::y
int y
Definition: uops_tmpl.h:59