FFmpeg
ops_chain.h
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef SWSCALE_OPS_CHAIN_H
22 #define SWSCALE_OPS_CHAIN_H
23 
24 #include "libavutil/cpu.h"
25 
26 #include "ops_internal.h"
27 
28 /**
29  * Helpers for SIMD implementations based on chained kernels, using a
30  * continuation passing style to link them together.
31  *
32  * The basic idea here is to "link" together a series of different operation
33  * kernels by constructing a list of kernel addresses into an SwsOpChain. Each
34  * kernel will load the address of the next kernel (the "continuation") from
35  * this struct, and jump directly into it; using an internal function signature
36  * that is an implementation detail of the specific backend.
37  */
38 
39 /**
40  * Private data for each kernel.
41  */
42 typedef union SwsOpPriv {
43  DECLARE_ALIGNED_16(char, data)[16];
44 
45  /* Common types */
46  void *ptr;
47  uint8_t u8[16];
48  uint16_t u16[8];
49  uint32_t u32[4];
50  float f32[4];
51 } SwsOpPriv;
52 
53 static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch");
54 
55 /* Setup helpers */
56 int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out);
57 int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out);
58 int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out);
59 int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out);
60 
61 /**
62  * Per-kernel execution context.
63  *
64  * Note: This struct is hard-coded in assembly, so do not change the layout.
65  */
66 typedef void (*SwsFuncPtr)(void);
67 typedef struct SwsOpImpl {
68  SwsFuncPtr cont; /* [offset = 0] Continuation for this operation. */
69  SwsOpPriv priv; /* [offset = 16] Private data for this operation. */
70 } SwsOpImpl;
71 
72 static_assert(sizeof(SwsOpImpl) == 32, "SwsOpImpl layout mismatch");
73 static_assert(offsetof(SwsOpImpl, priv) == 16, "SwsOpImpl layout mismatch");
74 
75 /**
76  * Compiled "chain" of operations, which can be dispatched efficiently.
77  * Effectively just a list of function pointers, alongside a small amount of
78  * private data for each operation.
79  */
80 typedef struct SwsOpChain {
81 #define SWS_MAX_OPS 16
82  SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */
83  void (*free[SWS_MAX_OPS + 1])(void *);
84  int num_impl;
85  int cpu_flags; /* set of all used CPU flags */
86 } SwsOpChain;
87 
89 void ff_sws_op_chain_free_cb(void *chain);
90 static inline void ff_sws_op_chain_free(SwsOpChain *chain)
91 {
93 }
94 
95 /* Returns 0 on success, or a negative error code. */
97  void (*free)(void *), const SwsOpPriv *priv);
98 
99 typedef struct SwsOpEntry {
100  /* Kernel metadata; reduced size subset of SwsOp */
103  bool flexible; /* if true, only the type and op are matched */
104  bool unused[4]; /* for kernels which operate on a subset of components */
105 
106  union { /* extra data defining the operation, unless `flexible` is true */
111  uint32_t linear_mask; /* subset of SwsLinearOp */
112  int dither_size; /* subset of SwsDitherOp */
113  int clear_value; /* clear value for integer clears */
114  };
115 
116  /* Kernel implementation */
118  int (*setup)(const SwsOp *op, SwsOpPriv *out); /* optional */
119  void (*free)(void *priv);
120 } SwsOpEntry;
121 
122 typedef struct SwsOpTable {
123  unsigned cpu_flags; /* required CPU flags for this table */
124  int block_size; /* fixed block size of this table */
125  const SwsOpEntry *entries[]; /* terminated by NULL */
126 } SwsOpTable;
127 
128 /**
129  * "Compile" a single op by looking it up in a list of fixed size op tables.
130  * See `op_match` in `ops.c` for details on how the matching works.
131  *
132  * Returns 0, AVERROR(EAGAIN), or a negative error code.
133  */
134 int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables,
135  SwsOpList *ops, const int block_size,
136  SwsOpChain *chain);
137 
138 #endif
SwsOpTable
Definition: ops_chain.h:122
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
SWS_MAX_OPS
#define SWS_MAX_OPS
Definition: ops_chain.h:81
out
FILE * out
Definition: movenc.c:55
SwsOpImpl::cont
SwsFuncPtr cont
Definition: ops_chain.h:68
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
data
const char data[16]
Definition: mxf.c:149
SwsOpEntry::type
SwsPixelType type
Definition: ops_chain.h:102
SwsOpEntry::op
SwsOpType op
Definition: ops_chain.h:101
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:85
SwsOpEntry::setup
int(* setup)(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.h:118
SwsPixelType
SwsPixelType
Copyright (C) 2025 Niklas Haas.
Definition: ops.h:30
SwsOpTable::block_size
int block_size
Definition: ops_chain.h:124
SwsOpPriv::u32
uint32_t u32[4]
Definition: ops_chain.h:49
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:66
ff_sws_setup_u8
int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:249
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *chain)
Definition: ops_chain.c:34
SwsOpPriv::DECLARE_ALIGNED_16
DECLARE_ALIGNED_16(char, data)[16]
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
SwsReadWriteOp
Definition: ops.h:96
SwsSwizzleOp
Definition: ops.h:114
ff_sws_setup_q
int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:266
SwsOpEntry::swizzle
SwsSwizzleOp swizzle
Definition: ops_chain.h:109
SwsOpEntry::free
void(* free)(void *priv)
Definition: ops_chain.h:119
SwsOpEntry::convert
SwsConvertOp convert
Definition: ops_chain.h:110
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SwsOpImpl
Definition: ops_chain.h:67
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:82
SwsOpTable::entries
const SwsOpEntry * entries[]
Definition: ops_chain.h:125
SwsOpPriv::f32
float f32[4]
Definition: ops_chain.h:50
SwsOpEntry::dither_size
int dither_size
Definition: ops_chain.h:112
SwsOpPriv::ptr
void * ptr
Definition: ops_chain.h:46
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:80
SwsOpEntry::flexible
bool flexible
Definition: ops_chain.h:103
SwsOpEntry::clear_value
int clear_value
Definition: ops_chain.h:113
SwsOpType
SwsOpType
Definition: ops.h:44
SwsOpEntry::func
SwsFuncPtr func
Definition: ops_chain.h:117
cpu.h
SwsOpPriv::u8
uint8_t u8[16]
Definition: ops_chain.h:47
ff_sws_op_compile_tables
int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, SwsOpList *ops, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
Definition: ops_chain.c:196
SwsOpChain::num_impl
int num_impl
Definition: ops_chain.h:84
SwsOpEntry
Definition: ops_chain.h:99
SwsOpPriv::u16
uint16_t u16[8]
Definition: ops_chain.h:48
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(void *)
Definition: ops_chain.h:83
ff_sws_op_chain_free
static void ff_sws_op_chain_free(SwsOpChain *chain)
Definition: ops_chain.h:90
SwsOpTable::cpu_flags
unsigned cpu_flags
Definition: ops_chain.h:123
SwsPackOp
Definition: ops.h:110
ops_internal.h
ff_sws_setup_u
int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:255
SwsOp
Definition: ops.h:179
ff_sws_op_chain_append
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void(*free)(void *), const SwsOpPriv *priv)
Definition: ops_chain.c:48
SwsOpEntry::rw
SwsReadWriteOp rw
Definition: ops_chain.h:107
SwsOpEntry::unused
bool unused[4]
Definition: ops_chain.h:104
SwsOpImpl::priv
SwsOpPriv priv
Definition: ops_chain.h:69
SwsConvertOp
Definition: ops.h:128
ff_sws_setup_q4
int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:279
SwsOpEntry::pack
SwsPackOp pack
Definition: ops_chain.h:108
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:209
SwsOpPriv
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:42
SwsOpEntry::linear_mask
uint32_t linear_mask
Definition: ops_chain.h:111