FFmpeg
uops.h
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef SWSCALE_UOPS_H
22 #define SWSCALE_UOPS_H
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 
28 /***************************************************************************
29  * Note: This header must be usable at build time, to generate asm sources *
30  ***************************************************************************/
31 
32 #include "libavutil/attributes.h"
33 
34 typedef struct SwsContext SwsContext;
35 typedef struct SwsFilterWeights SwsFilterWeights;
36 typedef struct SwsOpList SwsOpList;
37 
38 typedef enum SwsPixelType {
45 } SwsPixelType;
46 
50 
51 typedef union SwsPixel {
52  char data[4];
53 
54  uint8_t u8;
55  uint16_t u16;
56  uint32_t u32;
57  float f32;
58 } SwsPixel;
59 
60 /* Ensures (SwsPixel) {0} is properly initialized to all zeros */
61 static_assert(sizeof(SwsPixel) == sizeof(char[4]), "SwsPixel size mismatch");
62 
63 /**
64  * Bit-mask of components. Exact meaning depends on the usage context.
65  */
66 typedef uint8_t SwsCompMask;
67 enum {
69  SWS_COMP_ALL = 0xF,
70 #define SWS_COMP(X) (1 << (X))
71 #define SWS_COMP_TEST(mask, X) (!!((mask) & SWS_COMP(X)))
72 #define SWS_COMP_INV(mask) ((mask) ^ SWS_COMP_ALL)
73 #define SWS_COMP_ELEMS(N) ((1 << (N)) - 1)
74 #define SWS_COMP_MASK(X, Y, Z, W) \
75  (((X) ? SWS_COMP(0) : 0) | \
76  ((Y) ? SWS_COMP(1) : 0) | \
77  ((Z) ? SWS_COMP(2) : 0) | \
78  ((W) ? SWS_COMP(3) : 0))
79 };
80 
81 typedef uint32_t SwsUOpFlags;
82 typedef enum SwsUOpFlagBits {
84  SWS_UOP_FLAG_FMA = (1 << 0), /* platform supports FMA ops */
85  SWS_UOP_FLAG_MOVE = (1 << 1), /* platform supports SWS_UOP_MOVE */
87 
88 typedef enum SwsUOpType {
90 
91  /* Read/write uops; mask = components to read/write */
92  SWS_UOP_READ_PLANAR, /* simple planar byte-aligned read */
93  SWS_UOP_READ_PLANAR_FH, /* planar read with horizontal filter */
94  SWS_UOP_READ_PLANAR_FV, /* planar read with vertical filter */
96  SWS_UOP_READ_PACKED, /* simple packed byte-aligned read */
97  SWS_UOP_READ_NIBBLE, /* fractional read (4 bits) from single plane */
98  SWS_UOP_READ_BIT, /* fractional read (1 bit) from single plane */
99 
100  SWS_UOP_WRITE_PLANAR, /* simple planar byte-aligned write */
101  SWS_UOP_WRITE_PACKED, /* simple packed byte-aligned write */
102  SWS_UOP_WRITE_NIBBLE, /* fractional write (4 bits) to single plane */
103  SWS_UOP_WRITE_BIT, /* fractional write (1 bit) to single plane */
104 
105  /* Data rearrangement uops; mask = non-trivial and needed components */
106  SWS_UOP_PERMUTE, /* rearrange components (no duplicates) */
107  SWS_UOP_COPY, /* copy/duplicate components */
108  SWS_UOP_MOVE, /* series of register-register assignments */
109 
110  /* Data conversion / manipulation uops; mask = affected components */
111  SWS_UOP_SWAP_BYTES, /* swap byte order in components */
112  SWS_UOP_EXPAND_BIT, /* expand low-order bit to all bits in type */
113  SWS_UOP_EXPAND_PAIR, /* expand bytes in pairs (16 bit) */
114  SWS_UOP_EXPAND_QUAD, /* expand bytes in quads (32 bit) */
115  SWS_UOP_TO_U8, /* cast pixel values to SWS_PIXEL_U8 */
116  SWS_UOP_TO_U16, /* cast pixel values to SWS_PIXEL_U16 */
117  SWS_UOP_TO_U32, /* cast pixel values to SWS_PIXEL_U32 */
118  SWS_UOP_TO_F32, /* cast pixel values to SWS_PIXEL_F32 */
119 
120  /* Arithmetic uops */
121  SWS_UOP_SCALE, /* multiply masked components by scalar */
122  SWS_UOP_ADD, /* add vec4 to masked components */
123  SWS_UOP_MIN, /* min(x, vec4) on masked components */
124  SWS_UOP_MAX, /* max(x, vec4) on masked components */
125 
126  /* Identical to corresponding SwsOpType */
127  SWS_UOP_UNPACK, /* mask = nonzero components in pack pattern */
128  SWS_UOP_PACK, /* mask = nonzero components in pack pattern */
129  SWS_UOP_LSHIFT, /* mask = components to shift */
130  SWS_UOP_RSHIFT, /* mask = components to shift */
131  SWS_UOP_CLEAR, /* mask = components to clear */
132  SWS_UOP_LINEAR, /* mask = non-trivial output rows */
133  SWS_UOP_LINEAR_FMA, /* with SWS_UOP_FLAG_FMA */
134  SWS_UOP_DITHER, /* mask = components to dither */
135 
136  /* Platform-specific uops would go here */
138 } SwsUOpType;
139 
140 typedef struct SwsFilterUOp {
141  SwsPixelType type; /* pixel type to store result as */
142 } SwsFilterUOp;
143 
144 typedef struct SwsShiftUOp {
145  uint8_t amount;
146 } SwsShiftUOp;
147 
148 typedef struct SwsSwizzleUOp {
149  uint8_t in[4]; /* input component for each output component */
150 } SwsSwizzleUOp;
151 
152 typedef struct SwsMoveUOp {
153  /* The worst case number of moves (for two independent cycles) */
154  #define SWS_UOP_MOVE_MAX 6
156 
157  /* This may involve a temporary register (index -1) */
158  int8_t dst[SWS_UOP_MOVE_MAX]; /* destination register index */
159  int8_t src[SWS_UOP_MOVE_MAX]; /* source register index */
160 } SwsMoveUOp;
161 
162 typedef struct SwsPackUOp {
163  uint8_t pattern[4]; /* bit depth pattern, from MSB to LSB */
164 } SwsPackUOp;
165 
166 typedef struct SwsClearUOp {
167  SwsCompMask one; /* mask of coefficients equal to all 1s */
168  SwsCompMask zero; /* mask of coefficients equal to all 0s */
169 } SwsClearUOp;
170 
171 typedef struct SwsLinearUOp {
172  uint32_t one; /* mask of coefficients equal to one */
173  uint32_t zero; /* mask of coefficients equal to zero */
174 
175  /* for SWS_UOP_LINEAR_FMA only */
176  uint32_t exact; /* mask of coefficients whose product is exact */
177 } SwsLinearUOp;
178 
179 typedef struct SwsDitherUOp {
180  uint8_t y_offset[4];
181  uint8_t size_log2;
182 } SwsDitherUOp;
183 
184 /**
185  * Computes (1 << size_log2) + MAX(y_offset). The dither matrix attached to
186  * the SwsUOp is always pre-padded to this number of lines.
187  */
189 
190 typedef union SwsUOpParams {
191  SwsFilterUOp filter; /* for SWS_UOP_READ_*_FV/FH */
199 } SwsUOpParams;
200 
201 typedef struct SwsUOp {
202  /* These fields uniquely identify the uop implementation */
207 
208  /* Constant data for this uop; not part of the unique identifier */
209  union {
210  SwsFilterWeights *kernel; /* refstruct */
211  SwsPixel *ptr; /* refstruct */
214  SwsPixel mat4[4][5]; /* row major */
215  void *opaque; /* reserved for internal use */
216  } data;
217 } SwsUOp;
218 
219 /**
220  * Compare two SwsUOps for equality (excluding constant data).
221  */
222 int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b);
223 
224 static inline int ff_sws_uop_cmp_v(const void *a, const void *b)
225 {
226  return ff_sws_uop_cmp(a, b);
227 }
228 
229 /**
230  * Generate a unique name for a SwsUOp.
231  */
232 #define SWS_UOP_NAME_MAX 64
233 void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX]);
234 
235 typedef struct SwsUOpList {
237  int num_ops;
238 } SwsUOpList;
239 
241 void ff_sws_uop_list_free(SwsUOpList **ops);
242 
243 /* Takes over ownership of `uop` and sets it to {0}, even on failure. */
244 int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop);
245 
246 /**
247  * Translate a list of operations down to micro-ops, which can be further
248  * optimized and then directly executed by backends.
249  *
250  * Return 0 or a negative error code.
251  */
253  SwsUOpFlags flags, SwsUOpList *uops);
254 
255 /**
256  * Generate a set of boilerplate C preprocessor macros for describing and
257  * programmatically iterating over all possible SwsUOps.
258  *
259  * This function can be quite slow as it iterates over every possible
260  * combination of pixel formats and flags.
261  *
262  * Returns 0 or a negative error code. On success, an allocated string is
263  * returned via `out_str`, and must be av_free()'d by the caller.
264  */
265 int ff_sws_uops_macros_gen(char **out_str);
266 
267 #endif
flags
const SwsFlags flags[]
Definition: swscale.c:85
SWS_UOP_SCALE
@ SWS_UOP_SCALE
Definition: uops.h:121
SwsUOpParams::move
SwsMoveUOp move
Definition: uops.h:194
ff_sws_uops_macros_gen
int ff_sws_uops_macros_gen(char **out_str)
Generate a set of boilerplate C preprocessor macros for describing and programmatically iterating ove...
Definition: uops.c:966
ff_sws_uop_cmp_v
static int ff_sws_uop_cmp_v(const void *a, const void *b)
Definition: uops.h:224
SWS_UOP_RSHIFT
@ SWS_UOP_RSHIFT
Definition: uops.h:130
SWS_PIXEL_NONE
@ SWS_PIXEL_NONE
Definition: uops.h:39
SwsClearUOp::zero
SwsCompMask zero
Definition: uops.h:168
SWS_COMP_NONE
@ SWS_COMP_NONE
Definition: uops.h:68
SwsSwizzleUOp
Definition: uops.h:148
ff_sws_uop_cmp
int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b)
Compare two SwsUOps for equality (excluding constant data).
Definition: uops.c:32
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
ff_sws_uop_list_free
void ff_sws_uop_list_free(SwsUOpList **ops)
Definition: uops.c:368
b
#define b
Definition: input.c:43
SWS_UOP_MOVE_MAX
#define SWS_UOP_MOVE_MAX
Definition: uops.h:154
SWS_UOP_LINEAR_FMA
@ SWS_UOP_LINEAR_FMA
Definition: uops.h:133
SWS_UOP_MAX
@ SWS_UOP_MAX
Definition: uops.h:124
ff_sws_pixel_type_name
const char * ff_sws_pixel_type_name(SwsPixelType type)
Definition: ops.c:62
SwsUOpParams::swizzle
SwsSwizzleUOp swizzle
Definition: uops.h:193
SWS_UOP_LSHIFT
@ SWS_UOP_LSHIFT
Definition: uops.h:129
SwsLinearUOp::one
uint32_t one
Definition: uops.h:172
SwsFilterUOp
Definition: uops.h:140
SWS_UOP_TYPE_NB
@ SWS_UOP_TYPE_NB
Definition: uops.h:137
SWS_UOP_NAME_MAX
#define SWS_UOP_NAME_MAX
Generate a unique name for a SwsUOp.
Definition: uops.h:232
SwsMoveUOp::num_moves
int num_moves
Definition: uops.h:155
SwsMoveUOp
Definition: uops.h:152
SWS_UOP_TO_U16
@ SWS_UOP_TO_U16
Definition: uops.h:116
SWS_UOP_PACK
@ SWS_UOP_PACK
Definition: uops.h:128
ff_sws_uop_list_alloc
SwsUOpList * ff_sws_uop_list_alloc(void)
Definition: uops.c:382
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type) av_const
Definition: ops.c:77
SwsShiftUOp::amount
uint8_t amount
Definition: uops.h:145
SWS_UOP_PERMUTE
@ SWS_UOP_PERMUTE
Definition: uops.h:106
SwsUOpParams::pack
SwsPackUOp pack
Definition: uops.h:195
SWS_UOP_EXPAND_BIT
@ SWS_UOP_EXPAND_BIT
Definition: uops.h:112
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
SwsUOpParams
Definition: uops.h:190
SwsFilterUOp::type
SwsPixelType type
Definition: uops.h:141
SWS_UOP_COPY
@ SWS_UOP_COPY
Definition: uops.h:107
SWS_UOP_INVALID
@ SWS_UOP_INVALID
Definition: uops.h:89
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type) av_const
Definition: ops.c:92
SWS_UOP_WRITE_NIBBLE
@ SWS_UOP_WRITE_NIBBLE
Definition: uops.h:102
av_const
#define av_const
Definition: attributes.h:113
SwsUOp::kernel
SwsFilterWeights * kernel
Definition: uops.h:210
SWS_UOP_MOVE
@ SWS_UOP_MOVE
Definition: uops.h:108
SwsPackUOp
Definition: uops.h:162
dither
static const uint16_t dither[8][8]
Definition: vf_gradfun.c:46
SwsUOp::uop
SwsUOpType uop
Definition: uops.h:204
SWS_UOP_WRITE_PLANAR
@ SWS_UOP_WRITE_PLANAR
Definition: uops.h:100
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_UOP_TO_F32
@ SWS_UOP_TO_F32
Definition: uops.h:118
SWS_UOP_MIN
@ SWS_UOP_MIN
Definition: uops.h:123
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SwsCompMask
uint8_t SwsCompMask
Bit-mask of components.
Definition: uops.h:61
SWS_COMP_ALL
@ SWS_COMP_ALL
Definition: uops.h:69
SWS_UOP_READ_PACKED
@ SWS_UOP_READ_PACKED
Definition: uops.h:96
SwsPixel::f32
float f32
Definition: uops.h:57
SwsPixel::u8
uint8_t u8
Definition: uops.h:54
SwsUOp::mat4
SwsPixel mat4[4][5]
Definition: uops.h:214
SWS_PIXEL_TYPE_NB
@ SWS_PIXEL_TYPE_NB
Definition: uops.h:44
SwsUOpParams::shift
SwsShiftUOp shift
Definition: uops.h:192
SwsLinearUOp
Definition: uops.h:171
SwsMoveUOp::dst
int8_t dst[SWS_UOP_MOVE_MAX]
Definition: uops.h:158
SwsClearUOp::one
SwsCompMask one
Definition: uops.h:167
SWS_UOP_FLAG_MOVE
@ SWS_UOP_FLAG_MOVE
Definition: uops.h:85
SwsClearUOp
Definition: uops.h:166
SWS_UOP_READ_NIBBLE
@ SWS_UOP_READ_NIBBLE
Definition: uops.h:97
attributes.h
SWS_UOP_ADD
@ SWS_UOP_ADD
Definition: uops.h:122
SwsShiftUOp
Definition: uops.h:144
SwsPixelType
SwsPixelType
Definition: uops.h:38
SwsUOp::par
SwsUOpParams par
Definition: uops.h:206
SWS_UOP_TO_U32
@ SWS_UOP_TO_U32
Definition: uops.h:117
SwsUOp::data
union SwsUOp::@586 data
SwsPixel::u16
uint16_t u16
Definition: uops.h:55
SwsUOp
Definition: uops.h:201
SWS_UOP_WRITE_BIT
@ SWS_UOP_WRITE_BIT
Definition: uops.h:103
SwsUOp::opaque
void * opaque
Definition: uops.h:215
SWS_UOP_READ_PLANAR_FV_FMA
@ SWS_UOP_READ_PLANAR_FV_FMA
Definition: uops.h:95
SwsLinearUOp::zero
uint32_t zero
Definition: uops.h:173
SwsUOp::mask
SwsCompMask mask
Definition: uops.h:205
SwsDitherUOp::size_log2
uint8_t size_log2
Definition: uops.h:181
SWS_UOP_UNPACK
@ SWS_UOP_UNPACK
Definition: uops.h:127
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: uops.h:42
SWS_UOP_FLAG_NONE
@ SWS_UOP_FLAG_NONE
Definition: uops.h:83
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SwsPixel
Definition: uops.h:51
SwsUOpFlagBits
SwsUOpFlagBits
Definition: uops.h:82
SWS_UOP_TO_U8
@ SWS_UOP_TO_U8
Definition: uops.h:115
SWS_UOP_READ_PLANAR
@ SWS_UOP_READ_PLANAR
Definition: uops.h:92
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: uops.h:40
SwsUOpType
SwsUOpType
Definition: uops.h:88
SWS_UOP_SWAP_BYTES
@ SWS_UOP_SWAP_BYTES
Definition: uops.h:111
SwsUOp::scalar
SwsPixel scalar
Definition: uops.h:212
SWS_UOP_LINEAR
@ SWS_UOP_LINEAR
Definition: uops.h:132
SwsUOpParams::lin
SwsLinearUOp lin
Definition: uops.h:197
SwsPackUOp::pattern
uint8_t pattern[4]
Definition: uops.h:163
SwsUOp::type
SwsPixelType type
Definition: uops.h:203
SwsUOpList::num_ops
int num_ops
Definition: uops.h:237
ff_sws_uop_name
void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
Definition: uops.c:129
SwsUOp::ptr
SwsPixel * ptr
Definition: uops.h:211
ff_sws_uop_list_append
int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop)
Definition: uops.c:387
ff_sws_ops_translate
int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags, SwsUOpList *uops)
Translate a list of operations down to micro-ops, which can be further optimized and then directly ex...
Definition: uops.c:852
SwsLinearUOp::exact
uint32_t exact
Definition: uops.h:176
SwsDitherUOp::y_offset
uint8_t y_offset[4]
Definition: uops.h:180
SwsUOpList
Definition: uops.h:235
SwsUOp::vec4
SwsPixel vec4[4]
Definition: uops.h:213
SWS_UOP_DITHER
@ SWS_UOP_DITHER
Definition: uops.h:134
SWS_UOP_WRITE_PACKED
@ SWS_UOP_WRITE_PACKED
Definition: uops.h:101
SwsDitherUOp
Definition: uops.h:179
SwsUOpParams::dither
SwsDitherUOp dither
Definition: uops.h:198
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: uops.h:43
SWS_UOP_READ_PLANAR_FV
@ SWS_UOP_READ_PLANAR_FV
Definition: uops.h:94
SWS_UOP_EXPAND_QUAD
@ SWS_UOP_EXPAND_QUAD
Definition: uops.h:114
SwsUOpFlags
uint32_t SwsUOpFlags
Definition: uops.h:81
SWS_UOP_READ_PLANAR_FH
@ SWS_UOP_READ_PLANAR_FH
Definition: uops.h:93
SwsMoveUOp::src
int8_t src[SWS_UOP_MOVE_MAX]
Definition: uops.h:159
SwsUOpParams::filter
SwsFilterUOp filter
Definition: uops.h:191
SWS_UOP_FLAG_FMA
@ SWS_UOP_FLAG_FMA
Definition: uops.h:84
SWS_UOP_READ_BIT
@ SWS_UOP_READ_BIT
Definition: uops.h:98
SWS_UOP_CLEAR
@ SWS_UOP_CLEAR
Definition: uops.h:131
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:281
SwsContext
Main external API structure.
Definition: swscale.h:229
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: uops.h:41
SwsPixel::u32
uint32_t u32
Definition: uops.h:56
SwsSwizzleUOp::in
uint8_t in[4]
Definition: uops.h:149
SwsUOpParams::clear
SwsClearUOp clear
Definition: uops.h:196
SwsPixel::data
char data[4]
Definition: uops.h:52
SwsUOpList::ops
SwsUOp * ops
Definition: uops.h:236
ff_sws_dither_height
int ff_sws_dither_height(const SwsDitherUOp *dither)
Computes (1 << size_log2) + MAX(y_offset).
Definition: uops.c:400
SWS_UOP_EXPAND_PAIR
@ SWS_UOP_EXPAND_PAIR
Definition: uops.h:113