FFmpeg
uops.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2026 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdbool.h>
22 
23 #include "libavutil/avassert.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/refstruct.h"
26 #include "libavutil/tree.h"
27 
28 #include "ops.h"
29 #include "ops_internal.h"
30 #include "uops.h"
31 
32 int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b)
33 {
34  if (a->type != b->type)
35  return (int) a->type - b->type;
36  if (a->uop != b->uop)
37  return (int) a->uop - b->uop;
38  if (a->mask != b->mask)
39  return (int) a->mask - b->mask;
40  return memcmp(&a->par, &b->par, sizeof(a->par));
41 }
42 
43 static const struct {
44  char full[32];
45  char abbr[32];
46  char macro[32];
48 #define UOP_NAME(OP, ABBR) [SWS_UOP_##OP] = { "SWS_UOP_" #OP, ABBR, #OP }
49  UOP_NAME(INVALID, "invalid"),
50  UOP_NAME(READ_PLANAR, "read_planar"),
51  UOP_NAME(READ_PLANAR_FH, "read_planar_fh"),
52  UOP_NAME(READ_PLANAR_FV, "read_planar_fv"),
53  UOP_NAME(READ_PLANAR_FV_FMA,"read_planar_fv_fma"),
54  UOP_NAME(READ_PACKED, "read_packed"),
55  UOP_NAME(READ_NIBBLE, "read_nibble"),
56  UOP_NAME(READ_BIT, "read_bit"),
57  UOP_NAME(WRITE_PLANAR, "write_planar"),
58  UOP_NAME(WRITE_PACKED, "write_packed"),
59  UOP_NAME(WRITE_NIBBLE, "write_nibble"),
60  UOP_NAME(WRITE_BIT, "write_bit"),
61  UOP_NAME(PERMUTE, "permute"),
62  UOP_NAME(COPY, "copy"),
63  UOP_NAME(MOVE, "move"),
64  UOP_NAME(SWAP_BYTES, "swap_bytes"),
65  UOP_NAME(EXPAND_BIT, "expand_bit"),
66  UOP_NAME(EXPAND_PAIR, "expand_pair"),
67  UOP_NAME(EXPAND_QUAD, "expand_quad"),
68  UOP_NAME(TO_U8, "to_u8"),
69  UOP_NAME(TO_U16, "to_u16"),
70  UOP_NAME(TO_U32, "to_u32"),
71  UOP_NAME(TO_F32, "to_f32"),
72  UOP_NAME(SCALE, "scale"),
73  UOP_NAME(LINEAR, "linear"),
74  UOP_NAME(LINEAR_FMA, "linear_fma"),
75  UOP_NAME(ADD, "add"),
76  UOP_NAME(MIN, "min"),
77  UOP_NAME(MAX, "max"),
78  UOP_NAME(UNPACK, "unpack"),
79  UOP_NAME(PACK, "pack"),
80  UOP_NAME(LSHIFT, "lshift"),
81  UOP_NAME(RSHIFT, "rshift"),
82  UOP_NAME(CLEAR, "clear"),
83  UOP_NAME(DITHER, "dither"),
84 #undef UOP_NAME
85 };
86 
87 static const struct {
88  char full[16];
89  char prefix[8];
91  [SWS_PIXEL_NONE] = { "SWS_PIXEL_NONE", "" },
92  [SWS_PIXEL_U8] = { "SWS_PIXEL_U8", "U8_" },
93  [SWS_PIXEL_U16] = { "SWS_PIXEL_U16", "U16_" },
94  [SWS_PIXEL_U32] = { "SWS_PIXEL_U32", "U32_" },
95  [SWS_PIXEL_F32] = { "SWS_PIXEL_F32", "F32_" },
96 };
97 
99 {
100  av_assert1(val.den != 0);
101  switch (type) {
102  case SWS_PIXEL_U8: return (SwsPixel) { .u8 = val.num / val.den };
103  case SWS_PIXEL_U16: return (SwsPixel) { .u16 = val.num / val.den };
104  case SWS_PIXEL_U32: return (SwsPixel) { .u32 = val.num / val.den };
105  case SWS_PIXEL_F32: return (SwsPixel) { .f32 = (float) val.num / val.den };
106  case SWS_PIXEL_NONE:
107  case SWS_PIXEL_TYPE_NB: break;
108  }
109 
110  av_unreachable("Invalid pixel type!");
111  return (SwsPixel) {0};
112 }
113 
114 #define Q2PIXEL(val) pixel_from_q(op->type, val)
115 
117 {
118  switch (ff_sws_pixel_type_size(type)) {
119  case 1: return val.u8 == UINT8_MAX;
120  case 2: return val.u16 == UINT16_MAX;
121  case 4: return val.u32 == UINT32_MAX;
122  default: break;
123  }
124 
125  av_unreachable("Invalid pixel type!");
126  return false;
127 }
128 
129 void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
130 {
131  AVBPrint bp;
133 
134  if (op->type != SWS_PIXEL_NONE)
135  av_bprintf(&bp, "%s_", ff_sws_pixel_type_name(op->type));
136  av_bprintf(&bp, "%s", uop_names[op->uop].abbr);
137 
138  if (op->mask) {
139  av_bprint_chars(&bp, '_', 1);
140  for (int i = 0; i < 4; i++) {
141  if (SWS_COMP_TEST(op->mask, i))
142  av_bprint_chars(&bp, "xyzw"[i], 1);
143  }
144  }
145 
146  const SwsUOpParams *par = &op->par;
147  switch (op->uop) {
151  av_bprintf(&bp, "_%s", ff_sws_pixel_type_name(par->filter.type));
152  break;
153  case SWS_UOP_LSHIFT:
154  case SWS_UOP_RSHIFT:
155  av_bprintf(&bp, "_%u", par->shift.amount);
156  break;
157  case SWS_UOP_PERMUTE:
158  case SWS_UOP_COPY:
159  av_bprint_chars(&bp, '_', 1);
160  for (int i = 0; i < 4; i++) {
161  if (SWS_COMP_TEST(op->mask, i))
162  av_bprint_chars(&bp, "xyzw"[par->swizzle.in[i]], 1);
163  }
164  break;
165  case SWS_UOP_MOVE:
166  av_bprint_chars(&bp, '_', 1);
167  for (int i = 0; i < par->move.num_moves; i++)
168  av_bprint_chars(&bp, "txyzw"[par->move.dst[i] + 1], 1);
169  av_bprint_chars(&bp, '_', 1);
170  for (int i = 0; i < par->move.num_moves; i++)
171  av_bprint_chars(&bp, "txyzw"[par->move.src[i] + 1], 1);
172  break;
173  case SWS_UOP_PACK:
174  case SWS_UOP_UNPACK:
175  av_bprint_chars(&bp, '_', 1);
176  for (int i = 0; i < 4 && par->pack.pattern[i]; i++)
177  av_bprintf(&bp, "%x", par->pack.pattern[i]);
178  break;
179  case SWS_UOP_CLEAR:
180  av_bprint_chars(&bp, '_', 1);
181  for (int i = 0; i < 4; i++) {
182  if (!SWS_COMP_TEST(op->mask, i))
183  continue;
184  else if (SWS_COMP_TEST(par->clear.one, i))
185  av_bprint_chars(&bp, '1', 1);
186  else if (SWS_COMP_TEST(par->clear.zero, i))
187  av_bprint_chars(&bp, '0', 1);
188  else
189  av_bprint_chars(&bp, 'x', 1);
190  }
191  break;
192  case SWS_UOP_LINEAR:
193  case SWS_UOP_LINEAR_FMA:
194  for (int i = 0; i < 4; i++) {
195  if (!SWS_COMP_TEST(op->mask, i))
196  continue;
197  av_bprint_chars(&bp, '_', 1);
198  for (int j = 0; j < 5; j++) {
199  if (par->lin.one & SWS_MASK(i, j))
200  av_bprint_chars(&bp, '1', 1);
201  else if (par->lin.zero & SWS_MASK(i, j))
202  av_bprint_chars(&bp, '0', 1);
203  else if (par->lin.exact & SWS_MASK(i, j))
204  av_bprint_chars(&bp, 'X', 1);
205  else
206  av_bprint_chars(&bp, 'x', 1);
207  }
208  }
209  break;
210  case SWS_UOP_DITHER:
211  for (int i = 0; i < 4; i++) {
212  if (SWS_COMP_TEST(op->mask, i))
213  av_bprintf(&bp, "_%d", par->dither.y_offset[i]);
214  }
215  const unsigned size = 1u << par->dither.size_log2;
216  av_bprintf(&bp, "_%ux%u", size, size);
217  break;
218  }
219 
221 }
222 
223 static int generate_entry_struct(void *opaque, void *key)
224 {
225  const SwsUOp *ref = opaque;
226  const SwsUOp *uop = key;
227  AVBPrint *bp = ref->data.opaque;
228  char name[SWS_UOP_NAME_MAX];
229  ff_sws_uop_name(uop, name);
230  av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s", name);
231  av_bprintf(bp, ", .type = %-13s, .uop = %-24s, .mask = 0x%x",
232  pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask);
233 
234  const SwsUOpParams *par = &uop->par;
235  switch (uop->uop) {
239  av_bprintf(bp, ", .par.filter.type = %s", pixel_types[par->filter.type].full);
240  break;
241  case SWS_UOP_LSHIFT:
242  case SWS_UOP_RSHIFT:
243  av_bprintf(bp, ", .par.shift.amount = %u", par->shift.amount);
244  break;
245  case SWS_UOP_PERMUTE:
246  case SWS_UOP_COPY:
247  av_bprintf(bp, ", .par.swizzle.in = {%d, %d, %d, %d}",
248  par->swizzle.in[0], par->swizzle.in[1],
249  par->swizzle.in[2], par->swizzle.in[3]);
250  break;
251  case SWS_UOP_MOVE:
252  av_bprintf(bp, ", .par.move.num_moves = %d", par->move.num_moves);
253  av_bprintf(bp, ", .par.move.dst = {%d, %d, %d, %d, %d, %d}",
254  par->move.dst[0], par->move.dst[1], par->move.dst[2],
255  par->move.dst[3], par->move.dst[4], par->move.dst[5]);
256  av_bprintf(bp, ", .par.move.src = {%d, %d, %d, %d, %d, %d}",
257  par->move.src[0], par->move.src[1], par->move.src[2],
258  par->move.src[3], par->move.src[4], par->move.src[5]);
259  break;
260  case SWS_UOP_PACK:
261  case SWS_UOP_UNPACK:
262  av_bprintf(bp, ", .par.pack.pattern = {%d, %d, %d, %d}",
263  par->pack.pattern[0], par->pack.pattern[1],
264  par->pack.pattern[2], par->pack.pattern[3]);
265  break;
266  case SWS_UOP_CLEAR:
267  av_bprintf(bp, ", .par.clear.one = 0x%x, .par.clear.zero = 0x%x",
268  par->clear.one, par->clear.zero);
269  break;
270  case SWS_UOP_LINEAR:
271  case SWS_UOP_LINEAR_FMA:
272  av_bprintf(bp, ", .par.lin.one = 0x%x, .par.lin.zero = 0x%x",
273  par->lin.one, par->lin.zero);
274  if (uop->uop == SWS_UOP_LINEAR_FMA)
275  av_bprintf(bp, ", .par.lin.exact = 0x%x", par->lin.exact);
276  break;
277  case SWS_UOP_DITHER:
278  av_bprintf(bp, ", .par.dither = { .y_offset = {%u, %u, %u, %u}, .size_log2 = %u }",
279  par->dither.y_offset[0], par->dither.y_offset[1],
280  par->dither.y_offset[2], par->dither.y_offset[3],
281  par->dither.size_log2);
282  break;
283  }
284 
285  av_bprintf(bp, ")");
286  return 0;
287 }
288 
289 static int generate_entry_args(void *opaque, void *key)
290 {
291  const SwsUOp *ref = opaque;
292  const SwsUOp *uop = key;
293  AVBPrint *bp = ref->data.opaque;
294  char name[SWS_UOP_NAME_MAX];
295  ff_sws_uop_name(uop, name);
296  av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s, %-13s, %-24s, 0x%x",
297  name, pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask);
298 
299  const SwsUOpParams *par = &uop->par;
300  switch (uop->uop) {
304  av_bprintf(bp, ", %s", pixel_types[par->filter.type].full);
305  break;
306  case SWS_UOP_LSHIFT:
307  case SWS_UOP_RSHIFT:
308  av_bprintf(bp, ", %u", par->shift.amount);
309  break;
310  case SWS_UOP_PERMUTE:
311  case SWS_UOP_COPY:
312  av_bprintf(bp, ", %d, %d, %d, %d",
313  par->swizzle.in[0], par->swizzle.in[1],
314  par->swizzle.in[2], par->swizzle.in[3]);
315  break;
316  case SWS_UOP_MOVE:
317  av_bprintf(bp, ", %d", par->move.num_moves);
318  av_bprintf(bp, ", %d, %d, %d, %d, %d, %d",
319  par->move.dst[0], par->move.dst[1], par->move.dst[2],
320  par->move.dst[3], par->move.dst[4], par->move.dst[5]);
321  av_bprintf(bp, ", %d, %d, %d, %d, %d, %d",
322  par->move.src[0], par->move.src[1], par->move.src[2],
323  par->move.src[3], par->move.src[4], par->move.src[5]);
324  break;
325  case SWS_UOP_PACK:
326  case SWS_UOP_UNPACK:
327  av_bprintf(bp, ", %d, %d, %d, %d",
328  par->pack.pattern[0], par->pack.pattern[1],
329  par->pack.pattern[2], par->pack.pattern[3]);
330  break;
331  case SWS_UOP_CLEAR:
332  av_bprintf(bp, ", 0x%05x, 0x%05x", par->clear.one, par->clear.zero);
333  break;
334  case SWS_UOP_LINEAR:
335  case SWS_UOP_LINEAR_FMA:
336  av_bprintf(bp, ", 0x%05x, 0x%05x", par->lin.one, par->lin.zero);
337  if (uop->uop == SWS_UOP_LINEAR_FMA)
338  av_bprintf(bp, ", 0x%05x", par->lin.exact);
339  break;
340  case SWS_UOP_DITHER:
341  av_bprintf(bp, ", %u, %u, %u, %u, %u",
342  par->dither.y_offset[0], par->dither.y_offset[1],
343  par->dither.y_offset[2], par->dither.y_offset[3],
344  par->dither.size_log2);
345  break;
346  }
347 
348  av_bprintf(bp, ")");
349  return 0;
350 }
351 
352 static void uop_uninit(SwsUOp *uop)
353 {
354  switch (uop->uop) {
355  case SWS_UOP_DITHER:
356  av_refstruct_unref(&uop->data.ptr);
357  break;
362  break;
363  }
364 
365  *uop = (SwsUOp) {0};
366 }
367 
369 {
370  SwsUOpList *ops = *p_ops;
371  if (!ops)
372  return;
373 
374  for (int i = 0; i < ops->num_ops; i++)
375  uop_uninit(&ops->ops[i]);
376 
377  av_freep(&ops->ops);
378  av_free(ops);
379  *p_ops = NULL;
380 }
381 
383 {
384  return av_mallocz(sizeof(SwsUOpList));
385 }
386 
388 {
389  if (!av_dynarray2_add((void **) &uops->ops, &uops->num_ops,
390  sizeof(*uop), (uint8_t *) uop))
391  {
392  uop_uninit(uop);
393  return AVERROR(ENOMEM);
394  }
395 
396  *uop = (SwsUOp) {0};
397  return 0;
398 }
399 
401 {
402  int max_offset = 0;
403  for (int i = 0; i < 4; i++)
404  max_offset = FFMAX(max_offset, dither->y_offset[i]);
405  return (1 << dither->size_log2) + max_offset;
406 }
407 
409 {
410  switch (ff_sws_pixel_type_size(type)) {
411  case 1: return SWS_PIXEL_U8;
412  case 2: return SWS_PIXEL_U16;
413  case 4: return SWS_PIXEL_U32;
414  default: break;
415  }
416 
417  av_unreachable("Invalid pixel type!");
418  return SWS_PIXEL_NONE;
419 }
420 
421 static bool exact_product_f32(float a, float b)
422 {
423  volatile float prod = a * b;
424  volatile float result = b ? prod / b : 0.0f;
425  return !b || result == a;
426 }
427 
429  const SwsComps *comps, int idx)
430 {
431  const AVRational minq = comps->min[idx];
432  const AVRational maxq = comps->max[idx];
434  return true;
435  else if (!minq.den || !maxq.den)
436  return false; /* unknown bounds */
437 
438  const SwsPixel min = pixel_from_q(type, minq);
439  const SwsPixel max = pixel_from_q(type, maxq);
440  switch (type) {
441  case SWS_PIXEL_F32:
442  return exact_product_f32(coef.f32, min.f32) &&
443  exact_product_f32(coef.f32, max.f32);
444  }
445 
446  av_unreachable("Invalid pixel type!");
447  return false;
448 }
449 
451 {
452  if (!(flags & SWS_UOP_FLAG_FMA))
453  return false;
454  if (!(ctx->flags & SWS_BITEXACT))
455  return true;
456  if (!ff_sws_pixel_type_is_int(op->type))
457  return false;
458 
459  const int bits = ff_sws_pixel_type_size(op->type) * 8;
460  const uint64_t max_val = UINT64_MAX >> (64 - bits);
461 
462  /* Maximum value representable losslessly as float. Note that this is
463  * currently true only for U8, but that may change if we ever update the
464  * value of SWS_FILTER_SCALE. */
465  return max_val * SWS_FILTER_SCALE <= (1 << 22);
466 }
467 
469  const SwsOp *op)
470 {
471  SwsUOp uop = {
472  .type = op->type,
473  .mask = SWS_COMP_MASK(op->rw.elems > 0, op->rw.elems > 1,
474  op->rw.elems > 2, op->rw.elems > 3),
475  };
476 
477  /* Non-filtered reads don't care about the exact pixel contents */
478  if (!op->rw.filter)
479  uop.type = pixel_type_to_int(op->type);
480 
481  const bool is_read = op->op == SWS_OP_READ;
482  if (op->rw.filter) {
483  if (op->op == SWS_OP_WRITE || op->rw.frac || op->rw.packed)
484  return AVERROR(ENOTSUP);
486  uop.data.kernel = av_refstruct_ref(op->rw.kernel);
487  if (op->rw.filter == SWS_OP_FILTER_H) {
489  } else if (check_filter_fma(ctx, flags, op)) {
491  } else {
493  }
494  } else if (op->rw.packed && op->rw.elems > 1) {
495  if (op->rw.frac)
496  return AVERROR(ENOTSUP);
497  uop.uop = is_read ? SWS_UOP_READ_PACKED : SWS_UOP_WRITE_PACKED;
498  } else if (op->rw.frac == 3) {
499  uop.uop = is_read ? SWS_UOP_READ_BIT : SWS_UOP_WRITE_BIT;
500  } else if (op->rw.frac == 1) {
501  uop.uop = is_read ? SWS_UOP_READ_NIBBLE : SWS_UOP_WRITE_NIBBLE;
502  } else {
503  av_assert0(!op->rw.frac);
504  uop.uop = is_read ? SWS_UOP_READ_PLANAR : SWS_UOP_WRITE_PLANAR;
505  }
506 
507  return ff_sws_uop_list_append(ops, &uop);
508 }
509 
510 static int count_idx(const int *arr, size_t size, int val)
511 {
512  int num = 0;
513  for (size_t i = 0; i < size; i++) {
514  if (arr[i] == val)
515  num++;
516  }
517 
518  return num;
519 }
520 
521 static int translate_move(SwsUOpList *ops, const SwsOp *op)
522 {
523  SwsUOp uop = {
524  .uop = SWS_UOP_MOVE,
525  .type = pixel_type_to_int(op->type),
526  };
527  SwsMoveUOp *par = &uop.par.move;
528 
529  /* Mask of components that are not yet satisfied */
531  for (int i = 0; i < 4; i++) {
532  if (op->swizzle.in[i] == i)
533  todo &= ~SWS_COMP(i);
534  }
535 
536  /* Mask of components whose value is required for the final output */
537  SwsCompMask needed = 0;
538  for (int i = 0; i < 4; i++) {
539  if (SWS_OP_NEEDED(op, i))
540  needed |= SWS_COMP(op->swizzle.in[i]);
541  }
542 
543  /* Current mapping of registers to components */
544  int idx[4 + 1] = { 0, 1, 2, 3, -1 }; /* +1 for tmp */
545 
546  /* Decompose the swizzle mask into a series of register-register moves */
547  while (todo) {
548  int dst = -1, src = -1;
549 
550  /* Find next unsatisfied dst <- src move that doesn't clobber a value */
551  for (dst = 0; dst < 4; dst++) {
552  if (!SWS_COMP_TEST(todo, dst))
553  continue; /* already satisfied */
554  const int cur = idx[dst];
555  if (count_idx(idx, FF_ARRAY_ELEMS(idx), cur) == 1 && SWS_COMP_TEST(needed, cur))
556  continue; /* clobbers last remaining, still-needed value */
557  for (src = 0; src < FF_ARRAY_ELEMS(idx); src++) {
558  if (idx[src] == op->swizzle.in[dst]) {
559  /* Prevent read-after-write dependency. */
560  if (par->num_moves > 0 && src == par->dst[par->num_moves - 1])
561  src = par->src[par->num_moves - 1];
562  break;
563  }
564  }
565  av_assert1(src < FF_ARRAY_ELEMS(idx));
566  todo &= ~SWS_COMP(dst);
567  break;
568  }
569 
570  if (dst == 4) {
571  /* Stuck in a cycle, break it by saving to the scratch register */
572  dst = 4;
573  for (src = 0; src < 4; src++) {
574  if (SWS_COMP_TEST(todo, src)) {
575  needed &= ~SWS_COMP(idx[src]);
576  break;
577  }
578  }
579  av_assert1(src < 4);
580  }
581 
583  par->dst[par->num_moves] = dst > 3 ? -1 : dst;
584  par->src[par->num_moves] = src > 3 ? -1 : src;
585  par->num_moves++;
586  idx[dst] = idx[src];
587  }
588 
589  return ff_sws_uop_list_append(ops, &uop);
590 }
591 
593 {
594  if (flags & SWS_UOP_FLAG_MOVE)
595  return translate_move(ops, op);
596 
597  SwsUOp uop = {
598  .type = pixel_type_to_int(op->type),
599  .uop = SWS_UOP_PERMUTE,
600  .mask = ff_sws_comp_mask_needed(op),
601  .par.swizzle.in = {0, 1, 2, 3},
602  };
603 
604  SwsCompMask seen = 0;
605  for (int i = 0; i < 4; i++) {
606  if (!SWS_COMP_TEST(uop.mask, i))
607  continue;
608  const int src = op->swizzle.in[i];
609  if (SWS_COMP_TEST(seen, src))
610  uop.uop = SWS_UOP_COPY; /* Swizzle mask contains duplicates */
611  seen |= SWS_COMP(src);
612  uop.par.swizzle.in[i] = src;
613  }
614 
615  if (uop.uop == SWS_UOP_PERMUTE) {
616  /* Prevent overlap by moving unused components to unseen indices */
617  for (int i = 0; i < 4; i++) {
618  if (SWS_COMP_TEST(uop.mask, i))
619  continue;
620 
621  /* Prefer identity mapping if possible */
622  int unused = i;
623  if (SWS_COMP_TEST(seen, i)) {
624  for (int j = 0; j < 4; j++) {
625  if (!SWS_COMP_TEST(seen, j)) {
626  unused = j;
627  break;
628  }
629  }
630  }
631 
632  uop.par.swizzle.in[i] = unused;
633  seen |= SWS_COMP(unused);
634  }
635  }
636 
637  /* Remove remaining trivial / identity components from the mask */
638  for (int i = 0; i < 4; i++) {
639  if (uop.par.swizzle.in[i] == i)
640  uop.mask &= ~SWS_COMP(i);
641  }
642 
643  return ff_sws_uop_list_append(ops, &uop);
644 }
645 
646 static int translate_dither_op(SwsUOpList *ops, const SwsOp *op)
647 {
648  SwsUOp uop = {
649  .type = op->type,
650  .uop = SWS_UOP_DITHER,
651  .par.dither.size_log2 = op->dither.size_log2,
652  };
653 
654  if (op->dither.size_log2 == 0) {
655  /* Constant offset */
656  const SwsPixel val = Q2PIXEL(op->dither.matrix[0]);
657  uop.uop = SWS_UOP_ADD;
658  for (int i = 0; i < 4; i++) {
659  if (!SWS_OP_NEEDED(op, i) || op->dither.y_offset[i] < 0)
660  continue;
661  uop.mask |= SWS_COMP(i);
662  uop.data.vec4[i] = val;
663  }
664 
665  return ff_sws_uop_list_append(ops, &uop);
666  }
667 
668  const int size = 1 << op->dither.size_log2;
669  for (int i = 0; i < 4; i++) {
670  if (!SWS_OP_NEEDED(op, i) || op->dither.y_offset[i] < 0)
671  continue;
672  const uint8_t off = op->dither.y_offset[i] & (size - 1);
673  uop.mask |= SWS_COMP(i);
674  uop.par.dither.y_offset[i] = off;
675  }
676 
677  /* Allocate extra rows to allow over-reading for row offsets. Note that
678  * y_offset is currently never larger than 5, so the extra space needed
679  * for this over-allocation is bounded by 5 * size * sizeof(float),
680  * typically 320 bytes for a 16x16 dither matrix. */
681  const int stride = size * sizeof(SwsPixel);
682  const int num_rows = ff_sws_dither_height(&uop.par.dither);
683  SwsPixel *matrix = uop.data.ptr = av_refstruct_allocz(num_rows * stride);
684  if (!matrix)
685  return AVERROR(ENOMEM);
686 
687  for (int i = 0; i < size * size; i++)
688  matrix[i] = Q2PIXEL(op->dither.matrix[i]);
689  memcpy(&matrix[size * size], matrix, (num_rows - size) * stride);
690 
691  return ff_sws_uop_list_append(ops, &uop);
692 }
693 
695  SwsUOpFlags flags, const SwsOp *op,
696  const SwsComps *input)
697 {
698  SwsUOp uop = {
699  .type = op->type,
700  .uop = SWS_UOP_LINEAR,
701  };
702 
703  const bool bitexact = ctx->flags & SWS_BITEXACT;
704  uint32_t exact = 0;
705 
706  for (int i = 0; i < 4; i++) {
707  if (SWS_OP_NEEDED(op, i) && (op->lin.mask & SWS_MASK_ROW(i)))
708  uop.mask |= SWS_COMP(i);
709  for (int j = 0; j < 5; j++) {
710  const AVRational k = op->lin.m[i][j];
711  const SwsPixel px = Q2PIXEL(k);
712  uop.data.mat4[i][j] = px;
713  if (k.num == 0)
714  uop.par.lin.zero |= SWS_MASK(i, j);
715  else if (k.num == k.den)
716  uop.par.lin.one |= SWS_MASK(i, j);
717  else if (j < 4 && (!bitexact || exact_prod(uop.type, px, input, j)))
718  exact |= SWS_MASK(i, j);
719  }
720  }
721 
722  if (flags & SWS_UOP_FLAG_FMA) {
723  /* multiplication by 1 and 0 are always exact by definition */
724  uop.uop = SWS_UOP_LINEAR_FMA;
725  uop.par.lin.exact = exact | uop.par.lin.zero | uop.par.lin.one;
726  }
727 
728  return ff_sws_uop_list_append(ops, &uop);
729 }
730 
732 {
733  if (factor.den != 1)
734  return false;
735 
736  switch (type) {
737  case SWS_PIXEL_U8: return factor.num == UINT8_MAX;
738  case SWS_PIXEL_U16: return factor.num == UINT16_MAX;
739  case SWS_PIXEL_U32: return factor.num == UINT32_MAX;
740  case SWS_PIXEL_F32: return false;
741  case SWS_PIXEL_NONE:
742  case SWS_PIXEL_TYPE_NB: break;
743  }
744 
745  av_unreachable("Invalid pixel type!");
746  return false;
747 }
748 
750  const SwsOp *op, const SwsComps *input)
751 {
752  switch (op->op) {
753  case SWS_OP_FILTER_H:
754  case SWS_OP_FILTER_V:
755  return AVERROR(ENOTSUP); /* always handled by subpass splitting */
756  case SWS_OP_READ:
757  case SWS_OP_WRITE:
758  return translate_rw_op(ctx, uops, flags, op);
759  case SWS_OP_SWIZZLE:
760  return translate_swizzle(uops, flags, op);
761  case SWS_OP_DITHER:
762  return translate_dither_op(uops, op);
763  case SWS_OP_LINEAR:
764  return translate_linear_op(ctx, uops, flags, op, input);
765  default:
766  break;
767  }
768 
769  /* Default handling for "simple" ops */
770  SwsUOp uop = {
771  .type = op->type,
772  .uop = SWS_UOP_INVALID,
773  .mask = ff_sws_comp_mask_needed(op),
774  };
775 
776  switch (op->op) {
777  case SWS_OP_CONVERT:
778  if (op->convert.expand) {
779  av_assert0(op->type == SWS_PIXEL_U8);
780  switch (op->convert.to) {
781  case SWS_PIXEL_U16: uop.uop = SWS_UOP_EXPAND_PAIR; break;
782  case SWS_PIXEL_U32: uop.uop = SWS_UOP_EXPAND_QUAD; break;
783  }
784  } else {
785  switch (op->convert.to) {
786  case SWS_PIXEL_U8: uop.uop = SWS_UOP_TO_U8; break;
787  case SWS_PIXEL_U16: uop.uop = SWS_UOP_TO_U16; break;
788  case SWS_PIXEL_U32: uop.uop = SWS_UOP_TO_U32; break;
789  case SWS_PIXEL_F32: uop.uop = SWS_UOP_TO_F32; break;
790  }
791  }
792  break;
793  case SWS_OP_UNPACK:
794  case SWS_OP_PACK:
795  uop.uop = op->op == SWS_OP_PACK ? SWS_UOP_PACK : SWS_UOP_UNPACK;
796  uop.mask = 0;
797  for (int i = 0; i < 4 && op->pack.pattern[i]; i++) {
798  uop.par.pack.pattern[i] = op->pack.pattern[i];
799  uop.mask |= SWS_COMP(i);
800  }
801  break;
802  case SWS_OP_LSHIFT:
803  case SWS_OP_RSHIFT:
805  uop.par.shift.amount = op->shift.amount;
806  break;
807  case SWS_OP_CLEAR:
808  uop.uop = SWS_UOP_CLEAR;
809  uop.type = pixel_type_to_int(op->type);
810  uop.mask &= op->clear.mask;
811  for (int i = 0; i < 4; i++) {
812  if (!SWS_COMP_TEST(op->clear.mask, i))
813  continue;
814  const AVRational v = op->clear.value[i];
815  const SwsPixel px = Q2PIXEL(op->clear.value[i]);
816  uop.data.vec4[i] = px;
817  if (v.num == 0)
818  uop.par.clear.zero |= SWS_COMP(i);
819  else if (pixel_is_1s(op->type, px))
820  uop.par.clear.one |= SWS_COMP(i);
821  }
822  break;
823  case SWS_OP_SCALE:
824  if (is_expand_bit(op->type, op->scale.factor)) {
825  uop.uop = SWS_UOP_EXPAND_BIT;
826  } else {
827  uop.uop = SWS_UOP_SCALE;
828  uop.data.scalar = Q2PIXEL(op->scale.factor);
829  }
830  break;
831  case SWS_OP_MIN:
832  case SWS_OP_MAX:
833  uop.uop = op->op == SWS_OP_MIN ? SWS_UOP_MIN : SWS_UOP_MAX;
834  uop.mask &= ff_sws_comp_mask_q4(op->clamp.limit);
835  for (int i = 0; i < 4; i++) {
836  if (SWS_COMP_TEST(uop.mask, i))
837  uop.data.vec4[i] = Q2PIXEL(op->clamp.limit[i]);
838  }
839  break;
840  case SWS_OP_SWAP_BYTES:
841  uop.uop = SWS_UOP_SWAP_BYTES;
842  uop.type = pixel_type_to_int(op->type);
843  break;
844  }
845 
847  return ff_sws_uop_list_append(uops, &uop);
848 }
849 
852 {
853  SwsComps input = ops->comps_src;
854  for (int i = 0; i < ops->num_ops; i++) {
855  int ret = translate_op(ctx, uops, flags, &ops->ops[i], &input);
856  if (ret < 0)
857  return ret;
858  input = ops->ops[i].comps;
859  }
860  return 0;
861 }
862 
863 static int register_uop(struct AVTreeNode **root, const SwsUOp *uop)
864 {
865  SwsUOp *key = av_memdup(uop, sizeof(*uop));
866  if (!key)
867  return AVERROR(ENOMEM);
868  memset(&key->data, 0, sizeof(key->data));
869 
870  struct AVTreeNode *node = av_tree_node_alloc();
871  if (!node) {
872  av_free(key);
873  return AVERROR(ENOMEM);
874  }
875 
876  av_tree_insert(root, key, ff_sws_uop_cmp_v, &node);
877  if (node) {
878  av_free(node);
879  av_free(key);
880  }
881  return 0;
882 }
883 
885 {
887  if (!uops)
888  return AVERROR(ENOMEM);
889 
890  int ret = ff_sws_ops_translate(ctx, ops, flags, uops);
891  if (ret < 0)
892  goto fail;
893 
894  struct AVTreeNode **root = ctx->opaque;
895  for (int i = 0; i < uops->num_ops; i++) {
896  ret = register_uop(root, &uops->ops[i]);
897  if (ret < 0)
898  goto fail;
899  }
900 
901 fail:
902  ff_sws_uop_list_free(&uops);
903  return ret;
904 }
905 
906 static const SwsUOpFlags uop_flags[] = {
907  0,
908  SWS_UOP_FLAG_FMA | SWS_UOP_FLAG_MOVE, /* x86 backend */
909 };
910 
911 static int register_uops(SwsContext *ctx, const SwsOpList *ops,
913 {
914  for (int i = 0; i < FF_ARRAY_ELEMS(uop_flags); i++) {
915  int ret = register_flags(ctx, ops, uop_flags[i]);
916  if (ret < 0)
917  return ret;
918  }
919 
920  *out = (SwsCompiledOp) {0}; /* dummy value, will be immediately freed */
921  return 0;
922 }
923 
924 /* Dummy backend that just registers all seen uops */
925 static const SwsOpBackend backend_uops = {
926  .name = "uops_gen",
927  .compile = register_uops,
928 };
929 
930 static int register_all_uops(SwsContext *ctx, void *graph, SwsOpList *ops)
931 {
932  /* ff_sws_compile_pass() takes over ownership of `ops` */
934  if (!copy)
935  return AVERROR(ENOMEM);
936 
937  return ff_sws_compile_pass(graph, &backend_uops, &copy, 0, NULL, NULL);
938 }
939 
940 static const SwsFlags flags[] = {
941  0,
942  SWS_ACCURATE_RND, /* may insert extra 1x1 dither ops (for accurate rounding) */
943  SWS_BITEXACT, /* prevents some FMA optimizations */
945 };
946 
947 /* Limit the range of av_tree_enumerate() to only matching uop and type */
948 static int enum_type(void *opaque, void *elem)
949 {
950  const SwsUOp *a = opaque, *b = elem;
951  if (a->type != b->type)
952  return (int) b->type - a->type;
953  if (a->uop != b->uop)
954  return (int) b->uop - a->uop;
955  return 0;
956 }
957 
958 static int free_uop_key(void *opaque, void *key)
959 {
960  av_free(key);
961  return 0;
962 }
963 
964 int ff_sws_uops_macros_gen(char **out_str)
965 {
966  int ret;
967  struct AVTreeNode *root = NULL;
968 
969  AVBPrint bprint, *const bp = &bprint;
971 
972  /* Allocate dummy graph and context for ff_sws_compile_pass() */
973  SwsGraph *graph = ff_sws_graph_alloc();
974  if (!graph)
975  return AVERROR(ENOMEM);
976 
977  SwsContext *ctx = graph->ctx = sws_alloc_context();
978  if (!ctx) {
979  ret = AVERROR(ENOMEM);
980  goto fail;
981  }
982 
983  /* Use this to plumb the tree state through all the layers of abstraction */
984  ctx->opaque = &root;
985  ctx->scaler = SWS_SCALE_BILINEAR; /* cheaper to generate filter kernels */
986 
987  /* Register all unique uops over every relevant combination of flags */
988  for (int i = 0; i < FF_ARRAY_ELEMS(flags); i++) {
989  ctx->flags = flags[i];
992  if (ret < 0)
993  goto fail;
994  }
995 
996  /**
997  * Additionally make sure planar reads/writes are always available for all
998  * formats, because checkasm depends on them to be able to verify the
999  * input/output of any other operations.
1000  */
1003  continue;
1004  for (int elems = 1; elems <= 4; elems++) {
1005  for (int rw = 0; rw < 2; rw++) {
1006  SwsUOp uop = {
1007  .type = type,
1009  .mask = SWS_COMP_ELEMS(elems),
1010  };
1011 
1012  ret = register_uop(&root, &uop);
1013  if (ret < 0)
1014  goto fail;
1015  }
1016  }
1017  }
1018 
1019  #define BPRINT_STR(str) av_bprint_append_data(bp, str, strlen(str))
1020  BPRINT_STR(
1021 "/**\n"
1022 " * This file is automatically generated. Do not edit manually.\n"
1023 " * To regenerate, run: make fate-sws-uops-macros GEN=1\n"
1024 " */\n"
1025 "\n"
1026 "#ifndef SWSCALE_UOPS_MACROS_H\n"
1027 "#define SWSCALE_UOPS_MACROS_H\n"
1028 "\n"
1029 "/**\n"
1030 " * Boilerplate helper macros, for template-based backends. These will be\n"
1031 " * instantiated like this, with parameters in struct order:\n"
1032 " * MACRO(__VA_ARGS__, NAME, UOP, TYPE, MASK, [PARAMS,])\n"
1033 " * The _STRUCT variants pass all arguments in C struct syntax, while the\n"
1034 " * plain variants give them as separate C values (e.g. for use in calls)\n"
1035 " */\n"
1036 "#define SWS_GLUE3(x, y, z) x ## _ ## y ## _ ## z\n"
1037 "#define SWS_FOR(TYPE, UOP, MACRO, ...) \\\n"
1038 " SWS_GLUE3(SWS_FOR, TYPE, UOP)(MACRO, __VA_ARGS__)\n"
1039 "#define SWS_FOR_STRUCT(TYPE, UOP, MACRO, ...) \\\n"
1040 " SWS_GLUE3(SWS_FOR_STRUCT, TYPE, UOP)(MACRO, __VA_ARGS__)\n"
1041 "\n");
1042 
1043  SwsUOp key = { .data.opaque = bp };
1044  for (key.type = SWS_PIXEL_NONE + 1; key.type < SWS_PIXEL_TYPE_NB; key.type++) {
1045  for (key.uop = SWS_UOP_INVALID + 1; key.uop < SWS_UOP_TYPE_NB; key.uop++) {
1046  const char *macro = uop_names[key.uop].macro;
1047  const char *prefix = pixel_types[key.type].prefix;
1048  av_bprintf(bp, "#define SWS_FOR_%s%s(MACRO, ...)", prefix, macro);
1050  av_bprintf(bp, "\n");
1051  av_bprintf(bp, "#define SWS_FOR_STRUCT_%s%s(MACRO, ...)", prefix, macro);
1053  av_bprintf(bp, "\n");
1054  }
1055  }
1056 
1057  BPRINT_STR("\n#endif /* SWSCALE_UOPS_MACROS_H */");
1058  ret = av_bprint_finalize(bp, out_str);
1059 
1060 fail:
1061  av_bprint_finalize(bp, NULL);
1063  av_tree_destroy(root);
1064  ff_sws_graph_free(&graph);
1066  return ret;
1067 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:38
pixel_from_q
static SwsPixel pixel_from_q(SwsPixelType type, AVRational val)
Definition: uops.c:98
factor
static const int factor[16]
Definition: vf_pp7.c:98
AV_BPRINT_SIZE_UNLIMITED
#define AV_BPRINT_SIZE_UNLIMITED
SWS_UOP_SCALE
@ SWS_UOP_SCALE
Definition: uops.h:121
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:41
SwsGraph::ctx
SwsContext * ctx
Definition: graph.h:123
Q2PIXEL
#define Q2PIXEL(val)
Definition: uops.c:114
av_bprint_is_complete
static int av_bprint_is_complete(const AVBPrint *buf)
Test if the print buffer is complete (not truncated).
Definition: bprint.h:218
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
MAX
#define MAX
Definition: blend_modes.c:46
SwsUOpParams::move
SwsMoveUOp move
Definition: uops.h:194
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:46
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:44
ff_sws_op_list_duplicate
SwsOpList * ff_sws_op_list_duplicate(const SwsOpList *ops)
Returns a duplicate of ops, or NULL on OOM.
Definition: ops.c:634
free_uop_key
static int free_uop_key(void *opaque, void *key)
Definition: uops.c:958
out
static FILE * out
Definition: movenc.c:55
av_bprint_init
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
SwsOpList::comps_src
SwsComps comps_src
Source component metadata associated with pixel values from each corresponding component (in plane/me...
Definition: ops.h:277
ff_sws_uop_cmp_v
static int ff_sws_uop_cmp_v(const void *a, const void *b)
Definition: uops.h:224
SWS_UOP_RSHIFT
@ SWS_UOP_RSHIFT
Definition: uops.h:130
SWS_PIXEL_NONE
@ SWS_PIXEL_NONE
Definition: uops.h:39
av_tree_insert
void * av_tree_insert(AVTreeNode **tp, void *key, int(*cmp)(const void *key, const void *b), AVTreeNode **next)
Insert or remove an element.
Definition: tree.c:59
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:50
SWS_SCALE_BILINEAR
@ SWS_SCALE_BILINEAR
bilinear filtering
Definition: swscale.h:98
SwsClearUOp::zero
SwsCompMask zero
Definition: uops.h:168
matrix
Definition: vc1dsp.c:43
full
char full[32]
Definition: uops.c:44
SwsUOp::data
union SwsUOp::@585 data
ops.h
u
#define u(width, name, range_min, range_max)
Definition: cbs_apv.c:68
AVTreeNode::elem
void * elem
Definition: tree.c:28
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:58
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:180
av_dynarray2_add
void * av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size, const uint8_t *elem_data)
Add an element of size elem_size to a dynamic array.
Definition: mem.c:343
b
#define b
Definition: input.c:43
SWS_UOP_MOVE_MAX
#define SWS_UOP_MOVE_MAX
Definition: uops.h:154
SWS_UOP_LINEAR_FMA
@ SWS_UOP_LINEAR_FMA
Definition: uops.h:133
SWS_UOP_MAX
@ SWS_UOP_MAX
Definition: uops.h:124
translate_rw_op
static int translate_rw_op(SwsContext *ctx, SwsUOpList *ops, SwsUOpFlags flags, const SwsOp *op)
Definition: uops.c:468
ff_sws_uop_cmp
int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b)
Copyright (C) 2026 Niklas Haas.
Definition: uops.c:32
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SWS_COMP_MASK
#define SWS_COMP_MASK(X, Y, Z, W)
Definition: uops.h:74
av_tree_node_alloc
struct AVTreeNode * av_tree_node_alloc(void)
Allocate an AVTreeNode.
Definition: tree.c:34
SwsUOpParams::swizzle
SwsSwizzleUOp swizzle
Definition: uops.h:193
SWS_UOP_LSHIFT
@ SWS_UOP_LSHIFT
Definition: uops.h:129
SwsLinearUOp::one
uint32_t one
Definition: uops.h:172
SWS_UOP_TYPE_NB
@ SWS_UOP_TYPE_NB
Definition: uops.h:137
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:134
SWS_UOP_NAME_MAX
#define SWS_UOP_NAME_MAX
Generate a unique name for a SwsUOp.
Definition: uops.h:232
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:77
av_tree_enumerate
void av_tree_enumerate(AVTreeNode *t, void *opaque, int(*cmp)(void *opaque, void *elem), int(*enu)(void *opaque, void *elem))
Apply enu(opaque, &elem) to all the elements in the tree in a given range.
Definition: tree.c:155
check_filter_fma
static bool check_filter_fma(SwsContext *ctx, SwsUOpFlags flags, const SwsOp *op)
Definition: uops.c:450
ff_sws_graph_alloc
SwsGraph * ff_sws_graph_alloc(void)
Allocate an empty SwsGraph.
Definition: graph.c:827
ff_sws_comp_mask_needed
SwsCompMask ff_sws_comp_mask_needed(const SwsOp *op)
Definition: ops.c:159
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:182
SwsMoveUOp::num_moves
int num_moves
Definition: uops.h:155
av_memdup
void * av_memdup(const void *p, size_t size)
Duplicate a buffer with av_malloc().
Definition: mem.c:304
enum_type
static int enum_type(void *opaque, void *elem)
Definition: uops.c:948
SwsComps::max
AVRational max[4]
Definition: ops.h:84
SwsMoveUOp
Definition: uops.h:152
SWS_COMP_TEST
#define SWS_COMP_TEST(mask, X)
Definition: uops.h:71
av_bprint_init_for_buffer
void av_bprint_init_for_buffer(AVBPrint *buf, char *buffer, unsigned size)
Init a print buffer using a pre-existing buffer.
Definition: bprint.c:85
SWS_UOP_TO_U16
@ SWS_UOP_TO_U16
Definition: uops.h:116
SwsOpList::num_ops
int num_ops
Definition: ops.h:260
SWS_UOP_PACK
@ SWS_UOP_PACK
Definition: uops.h:128
SwsShiftUOp::amount
uint8_t amount
Definition: uops.h:145
SWS_UOP_PERMUTE
@ SWS_UOP_PERMUTE
Definition: uops.h:106
SwsUOpParams::pack
SwsPackUOp pack
Definition: uops.h:195
SWS_UOP_EXPAND_BIT
@ SWS_UOP_EXPAND_BIT
Definition: uops.h:112
translate_move
static int translate_move(SwsUOpList *ops, const SwsOp *op)
Definition: uops.c:521
UOP_NAME
#define UOP_NAME(OP, ABBR)
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:92
val
static double val(void *priv, double ch)
Definition: aeval.c:77
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
SwsUOpParams
Definition: uops.h:190
SWS_COMP_ELEMS
#define SWS_COMP_ELEMS(N)
Definition: uops.h:73
AVRational::num
int num
Numerator.
Definition: rational.h:59
SwsFilterUOp::type
SwsPixelType type
Definition: uops.h:141
refstruct.h
av_refstruct_allocz
static void * av_refstruct_allocz(size_t size)
Equivalent to av_refstruct_alloc_ext(size, 0, NULL, NULL)
Definition: refstruct.h:105
SWS_UOP_COPY
@ SWS_UOP_COPY
Definition: uops.h:107
SWS_UOP_INVALID
@ SWS_UOP_INVALID
Definition: uops.h:89
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:54
avassert.h
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
SWS_UOP_WRITE_NIBBLE
@ SWS_UOP_WRITE_NIBBLE
Definition: uops.h:102
uop_flags
static const SwsUOpFlags uop_flags[]
Definition: uops.c:906
SWS_OP_NEEDED
#define SWS_OP_NEEDED(op, idx)
Definition: ops.h:235
SwsUOp::kernel
SwsFilterWeights * kernel
Definition: uops.h:210
float
float
Definition: af_crystalizer.c:122
SWS_UOP_MOVE
@ SWS_UOP_MOVE
Definition: uops.h:108
SwsFlags
SwsFlags
Definition: swscale.h:133
dither
static const uint16_t dither[8][8]
Definition: vf_gradfun.c:46
SwsUOp::uop
SwsUOpType uop
Definition: uops.h:204
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1465
backend_uops
static const SwsOpBackend backend_uops
Definition: uops.c:925
SwsComps::min
AVRational min[4]
Definition: ops.h:84
SWS_UOP_WRITE_PLANAR
@ SWS_UOP_WRITE_PLANAR
Definition: uops.h:100
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SCALE
#define SCALE(c)
Definition: dcadata.c:7338
bits
uint8_t bits
Definition: vp3data.h:128
SWS_UOP_TO_F32
@ SWS_UOP_TO_F32
Definition: uops.h:118
LINEAR
#define LINEAR
Definition: vf_perspective.c:36
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
SWS_UOP_MIN
@ SWS_UOP_MIN
Definition: uops.h:123
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:52
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SwsCompMask
uint8_t SwsCompMask
Bit-mask of components.
Definition: uops.h:61
SWS_UOP_READ_PACKED
@ SWS_UOP_READ_PACKED
Definition: uops.h:96
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:57
count_idx
static int count_idx(const int *arr, size_t size, int val)
Definition: uops.c:510
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:61
COPY
#define COPY(src, name)
SwsPixel::f32
float f32
Definition: uops.h:57
AVFormatContext::opaque
void * opaque
User data.
Definition: avformat.h:1878
key
const char * key
Definition: hwcontext_opencl.c:189
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
RSHIFT
#define RSHIFT(a, b)
Definition: common.h:56
SwsOpBackend
Definition: ops_dispatch.h:133
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:45
prefix
char prefix[8]
Definition: uops.c:89
register_flags
static int register_flags(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags)
Definition: uops.c:884
fail
#define fail
Definition: test.h:478
result
and forward the result(frame or status change) to the corresponding input. If nothing is possible
NULL
#define NULL
Definition: coverity.c:32
SwsUOp::mat4
SwsPixel mat4[4][5]
Definition: uops.h:214
ADD
#define ADD(a, b)
Definition: dct32_template.c:123
SWS_PIXEL_TYPE_NB
@ SWS_PIXEL_TYPE_NB
Definition: uops.h:44
SwsUOpParams::shift
SwsShiftUOp shift
Definition: uops.h:192
flags
static const SwsFlags flags[]
Definition: uops.c:940
translate_swizzle
static int translate_swizzle(SwsUOpList *ops, SwsUOpFlags flags, const SwsOp *op)
Definition: uops.c:592
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
SwsMoveUOp::dst
int8_t dst[SWS_UOP_MOVE_MAX]
Definition: uops.h:158
AVTreeNode
Definition: tree.c:26
SwsClearUOp::one
SwsCompMask one
Definition: uops.h:167
SWS_UOP_FLAG_MOVE
@ SWS_UOP_FLAG_MOVE
Definition: uops.h:85
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:62
SWS_UOP_READ_NIBBLE
@ SWS_UOP_READ_NIBBLE
Definition: uops.h:97
MOVE
#define MOVE
Definition: rasc.c:45
SWS_UOP_ADD
@ SWS_UOP_ADD
Definition: uops.h:122
generate_entry_struct
static int generate_entry_struct(void *opaque, void *key)
Definition: uops.c:223
translate_dither_op
static int translate_dither_op(SwsUOpList *ops, const SwsOp *op)
Definition: uops.c:646
av_tree_destroy
void av_tree_destroy(AVTreeNode *t)
Definition: tree.c:146
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:180
SwsPixelType
SwsPixelType
Definition: uops.h:38
pixel_is_1s
static bool pixel_is_1s(SwsPixelType type, SwsPixel val)
Definition: uops.c:116
SwsUOp::par
SwsUOpParams par
Definition: uops.h:206
SWS_UOP_TO_U32
@ SWS_UOP_TO_U32
Definition: uops.h:117
exact_prod
static bool exact_prod(SwsPixelType type, SwsPixel coef, const SwsComps *comps, int idx)
Definition: uops.c:428
ff_sws_graph_free
void ff_sws_graph_free(SwsGraph **pgraph)
Uninitialize any state associate with this filter graph and free it.
Definition: graph.c:916
SwsUOp
Definition: uops.h:201
SWS_UOP_WRITE_BIT
@ SWS_UOP_WRITE_BIT
Definition: uops.h:103
uop_uninit
static void uop_uninit(SwsUOp *uop)
Definition: uops.c:352
copy
static void copy(const float *p1, float *p2, const int length)
Definition: vf_vaguedenoiser.c:186
av_bprint_finalize
int av_bprint_finalize(AVBPrint *buf, char **ret_str)
Finalize a print buffer.
Definition: bprint.c:235
sws_alloc_context
SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext and set its fields to default values.
Definition: utils.c:1043
SWS_UOP_READ_PLANAR_FV_FMA
@ SWS_UOP_READ_PLANAR_FV_FMA
Definition: uops.h:95
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SwsLinearUOp::zero
uint32_t zero
Definition: uops.h:173
SwsUOp::mask
SwsCompMask mask
Definition: uops.h:205
SwsDitherUOp::size_log2
uint8_t size_log2
Definition: uops.h:181
size
int size
Definition: twinvq_data.h:10344
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:47
pixel_types
static const struct @583 pixel_types[SWS_PIXEL_TYPE_NB]
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:39
SWS_UOP_UNPACK
@ SWS_UOP_UNPACK
Definition: uops.h:127
SWS_COMP
#define SWS_COMP(X)
Definition: uops.h:70
tree.h
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: uops.h:42
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SwsPixel
Definition: uops.h:51
SwsOp::comps
SwsComps comps
Metadata about the operation's input/output components.
Definition: ops.h:232
register_uop
static int register_uop(struct AVTreeNode **root, const SwsUOp *uop)
Definition: uops.c:863
ff_sws_uops_macros_gen
int ff_sws_uops_macros_gen(char **out_str)
Generate a set of boilerplate C preprocessor macros for describing and programmatically iterating ove...
Definition: uops.c:964
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
ff_sws_uop_list_alloc
SwsUOpList * ff_sws_uop_list_alloc(void)
Definition: uops.c:382
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
ff_sws_comp_mask_q4
SwsCompMask ff_sws_comp_mask_q4(const AVRational q[4])
Definition: ops.c:137
uop_names
static const struct @582 uop_names[SWS_UOP_TYPE_NB]
is_expand_bit
static bool is_expand_bit(SwsPixelType type, AVRational factor)
Definition: uops.c:731
register_all_uops
static int register_all_uops(SwsContext *ctx, void *graph, SwsOpList *ops)
Definition: uops.c:930
SWS_UOP_TO_U8
@ SWS_UOP_TO_U8
Definition: uops.h:115
exact_product_f32
static bool exact_product_f32(float a, float b)
Definition: uops.c:421
SWS_UOP_READ_PLANAR
@ SWS_UOP_READ_PLANAR
Definition: uops.h:92
SwsOpList::ops
SwsOp * ops
Definition: ops.h:259
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: uops.h:40
needed
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is needed
Definition: filter_design.txt:212
SWS_UOP_SWAP_BYTES
@ SWS_UOP_SWAP_BYTES
Definition: uops.h:111
SwsUOp::scalar
SwsPixel scalar
Definition: uops.h:212
ops_internal.h
MIN
#define MIN(a, b)
Definition: qt-faststart.c:45
SWS_UOP_LINEAR
@ SWS_UOP_LINEAR
Definition: uops.h:132
ff_sws_enum_op_lists
int ff_sws_enum_op_lists(SwsContext *ctx, void *opaque, enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, int(*cb)(SwsContext *ctx, void *opaque, SwsOpList *ops))
Helper function to enumerate over all possible (optimized) operation lists, under the current set of ...
Definition: ops.c:1059
SwsOp
Definition: ops.h:208
SwsUOpParams::lin
SwsLinearUOp lin
Definition: uops.h:197
SwsPackUOp::pattern
uint8_t pattern[4]
Definition: uops.h:163
abbr
char abbr[32]
Definition: uops.c:45
SwsUOp::type
SwsPixelType type
Definition: uops.h:203
pixel_type_to_int
static SwsPixelType pixel_type_to_int(const SwsPixelType type)
Definition: uops.c:408
ff_sws_ops_translate
int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags, SwsUOpList *uops)
Translate a list of operations down to micro-ops, which can be further optimized and then directly ex...
Definition: uops.c:850
ret
ret
Definition: filter_design.txt:187
BPRINT_STR
#define BPRINT_STR(str)
SwsUOpList::num_ops
int num_ops
Definition: uops.h:237
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:53
SwsCompiledOp
Definition: ops_dispatch.h:100
ff_sws_uop_list_free
void ff_sws_uop_list_free(SwsUOpList **p_ops)
Definition: uops.c:368
av_bprintf
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:122
SwsUOp::ptr
SwsPixel * ptr
Definition: uops.h:211
macro
char macro[32]
Definition: uops.c:46
SwsComps
Definition: ops.h:79
AVRational::den
int den
Denominator.
Definition: rational.h:60
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
ff_sws_pixel_type_name
const char * ff_sws_pixel_type_name(SwsPixelType type)
Definition: ops.c:62
SwsLinearUOp::exact
uint32_t exact
Definition: uops.h:176
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:40
ff_sws_uop_name
void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
Definition: uops.c:129
CLEAR
#define CLEAR(destin)
Definition: wavpackenc.c:50
SwsDitherUOp::y_offset
uint8_t y_offset[4]
Definition: uops.h:180
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:117
px
#define px
Definition: uops_tmpl.c:54
SwsUOpList
Definition: uops.h:235
SwsUOp::vec4
SwsPixel vec4[4]
Definition: uops.h:213
ff_sws_uop_list_append
int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop)
Definition: uops.c:387
ff_sws_compile_pass
int ff_sws_compile_pass(SwsGraph *graph, const SwsOpBackend *backend, SwsOpList **pops, int flags, SwsPass *input, SwsPass **output)
Resolves an operation list to a graph pass.
Definition: ops_dispatch.c:616
generate_entry_args
static int generate_entry_args(void *opaque, void *key)
Definition: uops.c:289
SWS_FILTER_SCALE
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
Definition: filters.h:40
SWS_UOP_DITHER
@ SWS_UOP_DITHER
Definition: uops.h:134
SWS_UOP_WRITE_PACKED
@ SWS_UOP_WRITE_PACKED
Definition: uops.h:101
SwsDitherUOp
Definition: uops.h:179
SwsUOpParams::dither
SwsDitherUOp dither
Definition: uops.h:198
mem.h
SwsGraph
Filter graph, which represents a 'baked' pixel format conversion.
Definition: graph.h:122
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: uops.h:43
SWS_UOP_READ_PLANAR_FV
@ SWS_UOP_READ_PLANAR_FV
Definition: uops.h:94
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
uops.h
SWS_UOP_EXPAND_QUAD
@ SWS_UOP_EXPAND_QUAD
Definition: uops.h:114
SwsUOpFlags
uint32_t SwsUOpFlags
Definition: uops.h:81
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:51
SWS_UOP_READ_PLANAR_FH
@ SWS_UOP_READ_PLANAR_FH
Definition: uops.h:93
sws_free_context
void sws_free_context(SwsContext **ctx)
Free the context and everything associated with it, and write NULL to the provided pointer.
Definition: utils.c:2381
SwsMoveUOp::src
int8_t src[SWS_UOP_MOVE_MAX]
Definition: uops.h:159
SwsUOpParams::filter
SwsFilterUOp filter
Definition: uops.h:191
translate_linear_op
static int translate_linear_op(SwsContext *ctx, SwsUOpList *ops, SwsUOpFlags flags, const SwsOp *op, const SwsComps *input)
Definition: uops.c:694
SWS_UOP_FLAG_FMA
@ SWS_UOP_FLAG_FMA
Definition: uops.h:84
ff_sws_dither_height
int ff_sws_dither_height(const SwsDitherUOp *dither)
Computes (1 << size_log2) + MAX(y_offset).
Definition: uops.c:400
translate_op
static int translate_op(SwsContext *ctx, SwsUOpList *uops, SwsUOpFlags flags, const SwsOp *op, const SwsComps *input)
Definition: uops.c:749
SWS_ACCURATE_RND
@ SWS_ACCURATE_RND
Force bit-exact output.
Definition: swscale.h:179
av_bprint_chars
void av_bprint_chars(AVBPrint *buf, char c, unsigned n)
Append char c n times to a print buffer.
Definition: bprint.c:130
SWS_UOP_READ_BIT
@ SWS_UOP_READ_BIT
Definition: uops.h:98
stride
#define stride
Definition: h264pred_template.c:536
SWS_UOP_CLEAR
@ SWS_UOP_CLEAR
Definition: uops.h:131
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:258
SwsContext
Main external API structure.
Definition: swscale.h:229
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: uops.h:41
SwsSwizzleUOp::in
uint8_t in[4]
Definition: uops.h:149
SwsUOpParams::clear
SwsClearUOp clear
Definition: uops.h:196
SwsUOpList::ops
SwsUOp * ops
Definition: uops.h:236
src
#define src
Definition: vp8dsp.c:248
SWS_UOP_EXPAND_PAIR
@ SWS_UOP_EXPAND_PAIR
Definition: uops.h:113
register_uops
static int register_uops(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
Definition: uops.c:911
min
float min
Definition: vorbis_enc_data.h:429