FFmpeg
ops_dispatch.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/mathematics.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavutil/refstruct.h"
27 
28 #include "ops.h"
29 #include "ops_internal.h"
30 #include "ops_dispatch.h"
31 
32 typedef struct SwsOpPass {
36  size_t num_blocks;
41  int planes_in;
45  int idx_in[4];
46  int idx_out[4];
47  int *offsets_y;
51  bool memcpy_out;
52  size_t tail_blocks;
53  uint8_t *tail_buf; /* extra memory for fixing unpadded tails */
54  unsigned int tail_buf_size;
55 } SwsOpPass;
56 
57 static int compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
58  const SwsOpList *ops, SwsCompiledOp *out)
59 {
60  SwsOpList *copy;
61  SwsCompiledOp compiled = {0};
62  int ret = 0;
63 
65  if (!copy)
66  return AVERROR(ENOMEM);
67 
68  /* Ensure these are always set during compilation */
70 
71  ret = backend->compile(ctx, copy, &compiled);
72  if (ret < 0) {
73  int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
74  av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
75  backend->name, av_err2str(ret));
76  goto fail;
77  }
78 
79  *out = compiled;
80 
81  av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
82  "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
83  backend->name, out->block_size, out->over_read, out->over_write,
84  out->cpu_flags);
85 
87 
88 fail:
90  return ret;
91 }
92 
94  const SwsOpList *ops, SwsCompiledOp *out)
95 {
96  if (backend)
97  return compile_backend(ctx, backend, ops, out);
98 
99  for (int n = 0; ff_sws_op_backends[n]; n++) {
100  const SwsOpBackend *backend = ff_sws_op_backends[n];
101  if (ops->src.hw_format != backend->hw_format ||
102  ops->dst.hw_format != backend->hw_format)
103  continue;
104  if (compile_backend(ctx, backend, ops, out) < 0)
105  continue;
106 
107  return 0;
108  }
109 
110  return AVERROR(ENOTSUP);
111 }
112 
114 {
115  if (comp->free)
116  comp->free(comp->priv);
117 
118  *comp = (SwsCompiledOp) {0};
119 }
120 
121 static void op_pass_free(void *ptr)
122 {
123  SwsOpPass *p = ptr;
124  if (!p)
125  return;
126 
127  ff_sws_compiled_op_unref(&p->comp);
128  av_refstruct_unref(&p->offsets_y);
129  av_free(p->exec_base.in_bump_y);
130  av_free(p->exec_base.in_offset_x);
131  av_free(p->tail_buf);
132  av_free(p);
133 }
134 
135 static inline void get_row_data(const SwsOpPass *p, const int y_dst,
136  const uint8_t *in[4], uint8_t *out[4])
137 {
138  const SwsOpExec *base = &p->exec_base;
139  const int y_src = p->offsets_y ? p->offsets_y[y_dst] : y_dst;
140  for (int i = 0; i < p->planes_in; i++)
141  in[i] = base->in[i] + (y_src >> base->in_sub_y[i]) * base->in_stride[i];
142  for (int i = 0; i < p->planes_out; i++)
143  out[i] = base->out[i] + (y_dst >> base->out_sub_y[i]) * base->out_stride[i];
144 }
145 
146 static inline size_t pixel_bytes(size_t pixels, int pixel_bits,
147  enum AVRounding rounding)
148 {
149  const uint64_t bits = (uint64_t) pixels * pixel_bits;
150  switch (rounding) {
151  case AV_ROUND_ZERO:
152  case AV_ROUND_DOWN:
153  return bits >> 3;
154  case AV_ROUND_INF:
155  case AV_ROUND_UP:
156  return (bits + 7) >> 3;
157  default:
158  av_unreachable("Invalid rounding mode");
159  return (size_t) -1;
160  }
161 }
162 
163 static size_t safe_bytes_pad(int linesize, int plane_pad)
164 {
165  av_assert1(linesize);
166  int64_t safe_bytes = FFABS((int64_t) linesize) - plane_pad;
167  return FFMAX(safe_bytes, 0);
168 }
169 
170 static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size,
171  ptrdiff_t safe_offset,
172  const int32_t *offset_bytes)
173 {
174  size_t safe_blocks = num_blocks;
175  while (safe_blocks && offset_bytes[safe_blocks * block_size - 1] > safe_offset)
176  safe_blocks--;
177  return safe_blocks;
178 }
179 
180 static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
181  const SwsPass *pass)
182 {
183  const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->format);
184  const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
185 
186  SwsOpPass *p = pass->priv;
187  SwsOpExec *exec = &p->exec_base;
188  const SwsCompiledOp *comp = &p->comp;
189 
190  /* Set up main loop parameters */
191  const unsigned block_size = comp->block_size;
192  const size_t num_blocks = (pass->width + block_size - 1) / block_size;
193  const size_t aligned_w = num_blocks * block_size;
194  if (aligned_w < pass->width) /* overflow */
195  return AVERROR(EINVAL);
196  p->num_blocks = num_blocks;
197  p->memcpy_first = false;
198  p->memcpy_last = false;
199  p->memcpy_out = false;
200 
201  size_t safe_blocks = num_blocks;
202  for (int i = 0; i < p->planes_in; i++) {
203  int idx = p->idx_in[i];
204  int chroma = idx == 1 || idx == 2;
205  int sub_x = chroma ? indesc->log2_chroma_w : 0;
206  int sub_y = chroma ? indesc->log2_chroma_h : 0;
207  size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read);
208  size_t safe_blocks_in;
209  if (exec->in_offset_x) {
210  size_t filter_size = pixel_bytes(p->filter_size, p->pixel_bits_in,
211  AV_ROUND_UP);
212  safe_blocks_in = safe_blocks_offset(num_blocks, block_size,
213  safe_bytes - filter_size,
214  exec->in_offset_x);
215  } else {
216  safe_blocks_in = safe_bytes / exec->block_size_in;
217  }
218 
219  if (safe_blocks_in < num_blocks) {
220  p->memcpy_first |= in->linesize[idx] < 0;
221  p->memcpy_last |= in->linesize[idx] > 0;
222  safe_blocks = FFMIN(safe_blocks, safe_blocks_in);
223  }
224 
225  size_t loop_size = num_blocks * exec->block_size_in;
226  exec->in[i] = in->data[idx];
227  exec->in_stride[i] = in->linesize[idx];
228  exec->in_bump[i] = in->linesize[idx] - loop_size;
229  exec->in_sub_y[i] = sub_y;
230  exec->in_sub_x[i] = sub_x;
231  }
232 
233  for (int i = 0; i < p->planes_out; i++) {
234  int idx = p->idx_out[i];
235  int chroma = idx == 1 || idx == 2;
236  int sub_x = chroma ? outdesc->log2_chroma_w : 0;
237  int sub_y = chroma ? outdesc->log2_chroma_h : 0;
238  size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write);
239  size_t safe_blocks_out = safe_bytes / exec->block_size_out;
240  if (safe_blocks_out < num_blocks) {
241  p->memcpy_out = true;
242  safe_blocks = FFMIN(safe_blocks, safe_blocks_out);
243  }
244 
245  size_t loop_size = num_blocks * exec->block_size_out;
246  exec->out[i] = out->data[idx];
247  exec->out_stride[i] = out->linesize[idx];
248  exec->out_bump[i] = out->linesize[idx] - loop_size;
249  exec->out_sub_y[i] = sub_y;
250  exec->out_sub_x[i] = sub_x;
251  }
252 
253  const bool memcpy_in = p->memcpy_first || p->memcpy_last;
254  if (!memcpy_in && !p->memcpy_out) {
255  av_assert0(safe_blocks == num_blocks);
256  return 0;
257  }
258 
259  /* Set-up tail section parameters and buffers */
260  SwsOpExec *tail = &p->exec_tail;
261  const int align = av_cpu_max_align();
262  size_t alloc_size = 0;
263  *tail = *exec;
264 
265  const size_t safe_width = safe_blocks * block_size;
266  const size_t tail_size = pass->width - safe_width;
267  p->tail_off_out = pixel_bytes(safe_width, p->pixel_bits_out, AV_ROUND_DOWN);
268  p->tail_size_out = pixel_bytes(tail_size, p->pixel_bits_out, AV_ROUND_UP);
269  p->tail_blocks = num_blocks - safe_blocks;
270 
271  if (exec->in_offset_x) {
272  p->tail_off_in = exec->in_offset_x[safe_width];
273  p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in;
274  p->tail_size_in += pixel_bytes(p->filter_size, p->pixel_bits_in, AV_ROUND_UP);
275  } else {
276  p->tail_off_in = pixel_bytes(safe_width, p->pixel_bits_in, AV_ROUND_DOWN);
277  p->tail_size_in = pixel_bytes(tail_size, p->pixel_bits_in, AV_ROUND_UP);
278  }
279 
280  const size_t alloc_width = aligned_w - safe_width;
281  for (int i = 0; memcpy_in && i < p->planes_in; i++) {
282  size_t needed_size;
283  if (exec->in_offset_x) {
284  /* The input offset map is already padded to multiples of the block
285  * size, and clamps the input offsets to the image boundaries; so
286  * we just need to compensate for the comp->over_read */
287  needed_size = p->tail_size_in;
288  } else {
289  needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP);
290  }
291  size_t loop_size = p->tail_blocks * exec->block_size_in;
292  tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align);
293  tail->in_bump[i] = tail->in_stride[i] - loop_size;
294  alloc_size += tail->in_stride[i] * in->height;
295  }
296 
297  for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
298  size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP);
299  size_t loop_size = p->tail_blocks * exec->block_size_out;
300  tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align);
301  tail->out_bump[i] = tail->out_stride[i] - loop_size;
302  alloc_size += tail->out_stride[i] * out->height;
303  }
304 
305  if (memcpy_in && exec->in_offset_x) {
306  /* `in_offset_x` is indexed relative to the line start, not the start
307  * of the section being processed; so we need to over-allocate this
308  * array to the full width of the image, even though we will only
309  * partially fill in the offsets relevant to the tail region */
310  alloc_size += aligned_w * sizeof(*exec->in_offset_x);
311  }
312 
313  av_fast_mallocz(&p->tail_buf, &p->tail_buf_size, alloc_size);
314  if (!p->tail_buf)
315  return AVERROR(ENOMEM);
316 
317  uint8_t *tail_buf = p->tail_buf;
318  for (int i = 0; memcpy_in && i < p->planes_in; i++) {
319  tail->in[i] = tail_buf;
320  tail_buf += tail->in_stride[i] * in->height;
321  }
322 
323  for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
324  tail->out[i] = tail_buf;
325  tail_buf += tail->out_stride[i] * out->height;
326  }
327 
328  if (memcpy_in && exec->in_offset_x) {
329  tail->in_offset_x = (int32_t *) tail_buf;
330  for (int i = safe_width; i < aligned_w; i++)
331  tail->in_offset_x[i] = exec->in_offset_x[i] - p->tail_off_in;
332  }
333 
334  return 0;
335 }
336 
337 static void copy_lines(uint8_t *dst, const size_t dst_stride,
338  const uint8_t *src, const size_t src_stride,
339  const int h, const size_t bytes)
340 {
341  for (int y = 0; y < h; y++) {
342  memcpy(dst, src, bytes);
343  dst += dst_stride;
344  src += src_stride;
345  }
346 }
347 
348 static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y,
349  const int h, const SwsPass *pass)
350 {
351  const SwsOpPass *p = pass->priv;
352  const SwsCompiledOp *comp = &p->comp;
353 
354  /* Fill exec metadata for this slice */
355  DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
356  exec.slice_y = y;
357  exec.slice_h = h;
358 
359  /**
360  * To ensure safety, we need to consider the following:
361  *
362  * 1. We can overread the input, unless this is the last line of an
363  * unpadded buffer. All defined operations can handle arbitrary pixel
364  * input, so overread of arbitrary data is fine. For flipped images,
365  * this condition is actually *inverted* to where the first line is
366  * the one at the end of the buffer.
367  *
368  * 2. We can overwrite the output, as long as we don't write more than the
369  * amount of pixels that fit into one linesize. So we always need to
370  * memcpy the last column on the output side if unpadded.
371  */
372 
373  const bool memcpy_in = p->memcpy_last && y + h == pass->height ||
374  p->memcpy_first && y == 0;
375  const bool memcpy_out = p->memcpy_out;
376  const size_t num_blocks = p->num_blocks;
377  const size_t tail_blocks = p->tail_blocks;
378 
379  get_row_data(p, y, exec.in, exec.out);
380  if (!memcpy_in && !memcpy_out) {
381  /* Fast path (fully aligned/padded inputs and outputs) */
382  comp->func(&exec, comp->priv, 0, y, num_blocks, y + h);
383  return;
384  }
385 
386  /* Non-aligned case (slow path); process main blocks as normal, and
387  * a separate tail (via memcpy into an appropriately padded buffer) */
388  if (num_blocks > tail_blocks) {
389  for (int i = 0; i < 4; i++) {
390  /* We process fewer blocks, so the in_bump needs to be increased
391  * to reflect that the plane pointers are left on the last block,
392  * not the end of the processed line, after each loop iteration */
393  exec.in_bump[i] += exec.block_size_in * tail_blocks;
394  exec.out_bump[i] += exec.block_size_out * tail_blocks;
395  }
396 
397  comp->func(&exec, comp->priv, 0, y, num_blocks - tail_blocks, y + h);
398  }
399 
400  DECLARE_ALIGNED_32(SwsOpExec, tail) = p->exec_tail;
401  tail.slice_y = y;
402  tail.slice_h = h;
403 
404  for (int i = 0; i < p->planes_in; i++) {
405  /* Input offsets are relative to the base pointer */
406  if (!exec.in_offset_x || memcpy_in)
407  exec.in[i] += p->tail_off_in;
408  tail.in[i] += y * tail.in_stride[i];
409  }
410  for (int i = 0; i < p->planes_out; i++) {
411  exec.out[i] += p->tail_off_out;
412  tail.out[i] += y * tail.out_stride[i];
413  }
414 
415  for (int i = 0; i < p->planes_in; i++) {
416  if (memcpy_in) {
417  copy_lines((uint8_t *) tail.in[i], tail.in_stride[i],
418  exec.in[i], exec.in_stride[i], h, p->tail_size_in);
419  } else {
420  /* Reuse input pointers directly */
421  const size_t loop_size = tail_blocks * exec.block_size_in;
422  tail.in[i] = exec.in[i];
423  tail.in_stride[i] = exec.in_stride[i];
424  tail.in_bump[i] = exec.in_stride[i] - loop_size;
425  }
426  }
427 
428  for (int i = 0; !memcpy_out && i < p->planes_out; i++) {
429  /* Reuse output pointers directly */
430  const size_t loop_size = tail_blocks * exec.block_size_out;
431  tail.out[i] = exec.out[i];
432  tail.out_stride[i] = exec.out_stride[i];
433  tail.out_bump[i] = exec.out_stride[i] - loop_size;
434  }
435 
436  /* Dispatch kernel over tail */
437  av_assert1(tail_blocks > 0);
438  comp->func(&tail, comp->priv, num_blocks - tail_blocks, y, num_blocks, y + h);
439 
440  for (int i = 0; memcpy_out && i < p->planes_out; i++) {
441  copy_lines(exec.out[i], exec.out_stride[i],
442  tail.out[i], tail.out_stride[i], h, p->tail_size_out);
443  }
444 }
445 
446 static int rw_planes(const SwsOp *op)
447 {
448  return op->rw.packed ? 1 : op->rw.elems;
449 }
450 
451 static int rw_pixel_bits(const SwsOp *op)
452 {
453  const int elems = op->rw.packed ? op->rw.elems : 1;
454  const int size = ff_sws_pixel_type_size(op->type);
455  const int bits = 8 >> op->rw.frac;
456  av_assert1(bits >= 1);
457  return elems * size * bits;
458 }
459 
460 static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
461 {
462  if (!pass)
463  return;
464 
465  /* Add at least as many pixels as needed to cover the padding requirement */
466  const int pad = (over_rw * 8 + pixel_bits - 1) / pixel_bits;
467 
468  SwsPassBuffer *buf = pass->output;
469  buf->width_align = FFMAX(buf->width_align, block_size);
470  buf->width_pad = FFMAX(buf->width_pad, pad);
471 }
472 
473 static int compile(SwsGraph *graph, const SwsOpBackend *backend,
474  const SwsOpList *ops, SwsPass *input, SwsPass **output)
475 {
476  SwsContext *ctx = graph->ctx;
477  SwsOpPass *p = av_mallocz(sizeof(*p));
478  if (!p)
479  return AVERROR(ENOMEM);
480 
481  int ret = ff_sws_ops_compile(ctx, backend, ops, &p->comp);
482  if (ret < 0)
483  goto fail;
484  else if (!output)
485  goto fail; /* nothing to do, just return */
486 
487  const SwsCompiledOp *comp = &p->comp;
488  const SwsFormat *dst = &ops->dst;
489  if (p->comp.opaque) {
490  SwsCompiledOp c = *comp;
491  av_free(p);
492  return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
493  input, c.slice_align, c.func_opaque,
494  NULL, c.priv, c.free, output);
495  }
496 
497  const SwsOp *read = ff_sws_op_list_input(ops);
498  const SwsOp *write = ff_sws_op_list_output(ops);
499  p->planes_in = rw_planes(read);
500  p->planes_out = rw_planes(write);
501  p->pixel_bits_in = rw_pixel_bits(read);
502  p->pixel_bits_out = rw_pixel_bits(write);
503  p->exec_base = (SwsOpExec) {
504  .width = dst->width,
505  .height = dst->height,
506  };
507 
508  const int64_t block_bits_in = (int64_t) comp->block_size * p->pixel_bits_in;
509  const int64_t block_bits_out = (int64_t) comp->block_size * p->pixel_bits_out;
510  if (block_bits_in & 0x7 || block_bits_out & 0x7) {
511  av_log(ctx, AV_LOG_ERROR, "Block size must be a multiple of the pixel size.\n");
512  ret = AVERROR(EINVAL);
513  goto fail;
514  }
515 
516  p->exec_base.block_size_in = block_bits_in >> 3;
517  p->exec_base.block_size_out = block_bits_out >> 3;
518 
519  for (int i = 0; i < 4; i++) {
520  p->idx_in[i] = i < p->planes_in ? ops->plane_src[i] : -1;
521  p->idx_out[i] = i < p->planes_out ? ops->plane_dst[i] : -1;
522  }
523 
524  const SwsFilterWeights *filter = read->rw.kernel;
525  if (read->rw.filter == SWS_OP_FILTER_V) {
526  p->offsets_y = av_refstruct_ref(filter->offsets);
527 
528  /* Compute relative pointer bumps for each output line */
529  int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump));
530  if (!bump) {
531  ret = AVERROR(ENOMEM);
532  goto fail;
533  }
534 
535  int line = filter->offsets[0];
536  for (int y = 0; y < filter->dst_size - 1; y++) {
537  int next = filter->offsets[y + 1];
538  bump[y] = next - line - 1;
539  line = next;
540  }
541  bump[filter->dst_size - 1] = 0;
542  p->exec_base.in_bump_y = bump;
543  } else if (read->rw.filter == SWS_OP_FILTER_H) {
544  /* Compute pixel offset map for each output line */
545  const int pixels = FFALIGN(filter->dst_size, p->comp.block_size);
546  int32_t *offset = av_malloc_array(pixels, sizeof(*offset));
547  if (!offset) {
548  ret = AVERROR(ENOMEM);
549  goto fail;
550  }
551  p->exec_base.in_offset_x = offset;
552 
553  for (int x = 0; x < filter->dst_size; x++) {
554  /* Sanity check; if the tap would land on a half-pixel, we cannot
555  * reasonably expect the implementation to know about this. Just
556  * error out in such (theoretical) cases. */
557  int64_t bits = (int64_t) filter->offsets[x] * p->pixel_bits_in;
558  if ((bits & 0x7) || (bits >> 3) > INT32_MAX) {
559  ret = AVERROR(EINVAL);
560  goto fail;
561  }
562  offset[x] = bits >> 3;
563  }
564  for (int x = filter->dst_size; x < pixels; x++)
565  offset[x] = offset[filter->dst_size - 1];
566  p->exec_base.block_size_in = 0; /* ptr does not advance */
567  p->filter_size = filter->filter_size;
568  }
569 
570  ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
571  input, comp->slice_align, op_pass_run,
573  if (ret < 0)
574  return ret;
575 
576  align_pass(input, comp->block_size, comp->over_read, p->pixel_bits_in);
577  align_pass(*output, comp->block_size, comp->over_write, p->pixel_bits_out);
578  return 0;
579 
580 fail:
581  op_pass_free(p);
582  return ret;
583 }
584 
585 int ff_sws_compile_pass(SwsGraph *graph, const SwsOpBackend *backend,
586  SwsOpList **pops, int flags, SwsPass *input,
587  SwsPass **output)
588 {
589  const int passes_orig = graph->num_passes;
590  SwsContext *ctx = graph->ctx;
591  SwsOpList *ops = *pops;
592  int ret = 0;
593 
594  /* Check if the whole operation graph is an end-to-end no-op */
595  if (ff_sws_op_list_is_noop(ops)) {
596  if (output)
597  *output = input;
598  goto out;
599  }
600 
601  const SwsOp *read = ff_sws_op_list_input(ops);
602  const SwsOp *write = ff_sws_op_list_output(ops);
603  if (!read || !write) {
604  av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
605  "and write, respectively.\n");
606  ret = AVERROR(EINVAL);
607  goto out;
608  }
609 
610  if (flags & SWS_OP_FLAG_OPTIMIZE) {
612  if (ret < 0)
613  goto out;
614  av_log(ctx, AV_LOG_DEBUG, "Operation list after optimizing:\n");
616  }
617 
618  ret = compile(graph, backend, ops, input, output);
619  if (ret != AVERROR(ENOTSUP))
620  goto out;
621 
622  av_log(ctx, AV_LOG_DEBUG, "Retrying with separated filter passes.\n");
623  SwsPass *prev = input;
624  bool first = true;
625  while (ops) {
626  SwsOpList *rest;
627  ret = ff_sws_op_list_subpass(ops, &rest);
628  if (ret < 0)
629  goto out;
630 
631  if (first && !rest) {
632  /* No point in compiling an unsplit pass again */
633  ret = AVERROR(ENOTSUP);
634  goto out;
635  }
636 
637  ret = compile(graph, backend, ops, prev, output ? &prev : NULL);
638  if (ret < 0) {
639  ff_sws_op_list_free(&rest);
640  goto out;
641  }
642 
643  ff_sws_op_list_free(&ops);
644  first = false;
645  ops = rest;
646  }
647 
648  if (output) {
649  /* Return last subpass successfully compiled */
650  av_log(ctx, AV_LOG_VERBOSE, "Using %d separate passes.\n",
651  graph->num_passes - passes_orig);
652  *output = prev;
653  }
654 
655 out:
656  if (ret == AVERROR(ENOTSUP)) {
657  av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
659  }
660  if (ret < 0)
661  ff_sws_graph_rollback(graph, passes_orig);
662  ff_sws_op_list_free(&ops);
663  *pops = NULL;
664  return ret;
665 }
flags
const SwsFlags flags[]
Definition: swscale.c:72
SwsOpPass::tail_buf
uint8_t * tail_buf
Definition: ops_dispatch.c:53
copy_lines
static void copy_lines(uint8_t *dst, const size_t dst_stride, const uint8_t *src, const size_t src_stride, const int h, const size_t bytes)
Definition: ops_dispatch.c:337
AV_ROUND_UP
@ AV_ROUND_UP
Round toward +infinity.
Definition: mathematics.h:134
SwsOpPass::filter_size
int filter_size
Definition: ops_dispatch.c:48
SwsOpPass::tail_buf_size
unsigned int tail_buf_size
Definition: ops_dispatch.c:54
rw_planes
static int rw_planes(const SwsOp *op)
Definition: ops_dispatch.c:446
ff_sws_op_list_free
void ff_sws_op_list_free(SwsOpList **p_ops)
Definition: ops.c:620
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:216
SwsGraph::ctx
SwsContext * ctx
Definition: graph.h:122
SwsPass
Represents a single filter pass in the scaling graph.
Definition: graph.h:75
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsOpPass::idx_in
int idx_in[4]
Definition: ops_dispatch.c:45
SwsOpPass::tail_size_out
int tail_size_out
Definition: ops_dispatch.c:40
ff_sws_op_list_duplicate
SwsOpList * ff_sws_op_list_duplicate(const SwsOpList *ops)
Returns a duplicate of ops, or NULL on OOM.
Definition: ops.c:634
mem_internal.h
out
static FILE * out
Definition: movenc.c:55
SwsOpPass::exec_tail
SwsOpExec exec_tail
Definition: ops_dispatch.c:35
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:79
SwsOpExec::in_bump
ptrdiff_t in_bump[4]
Pointer bump, difference between stride and processed line size.
Definition: ops_dispatch.h:51
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:671
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
SwsOpExec::out_stride
ptrdiff_t out_stride[4]
Definition: ops_dispatch.h:42
SwsOpExec::in
const uint8_t * in[4]
Definition: ops_dispatch.h:37
int64_t
long long int64_t
Definition: coverity.c:34
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:226
ops.h
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
SwsOpExec::block_size_in
int32_t block_size_in
Definition: ops_dispatch.h:57
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
AV_ROUND_ZERO
@ AV_ROUND_ZERO
Round toward zero.
Definition: mathematics.h:131
AVRounding
AVRounding
Rounding methods.
Definition: mathematics.h:130
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:226
base
uint8_t base
Definition: vp3data.h:128
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
mathematics.h
ops_dispatch.h
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
SwsOpPass::tail_blocks
size_t tail_blocks
Definition: ops_dispatch.c:52
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:131
SwsOpPass::idx_out
int idx_out[4]
Definition: ops_dispatch.c:46
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:77
SwsPass::width
int width
Definition: graph.h:85
ff_sws_op_list_subpass
int ff_sws_op_list_subpass(SwsOpList *ops, SwsOpList **out_rest)
Eliminate SWS_OP_FILTER_* operations by merging them with prior SWS_OP_READ operations.
Definition: ops_optimizer.c:937
SwsOpList::plane_dst
uint8_t plane_dst[4]
Definition: ops.h:296
ff_sws_op_list_print
void ff_sws_op_list_print(void *log, int lev, int lev_extra, const SwsOpList *ops)
Print out the contents of an operation list.
Definition: ops.c:961
ff_sws_op_backends
const SwsOpBackend *const ff_sws_op_backends[]
Definition: ops.c:45
SwsFrame::data
uint8_t * data[4]
Definition: format.h:212
fail
#define fail()
Definition: checkasm.h:225
SwsOpBackend::compile
int(* compile)(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
Compile an operation list to an implementation chain.
Definition: ops_dispatch.h:139
SwsOpBackend::hw_format
enum AVPixelFormat hw_format
If NONE, backend only supports software frames.
Definition: ops_dispatch.h:146
SwsOpPass::memcpy_last
bool memcpy_last
Definition: ops_dispatch.c:50
refstruct.h
get_row_data
static void get_row_data(const SwsOpPass *p, const int y_dst, const uint8_t *in[4], uint8_t *out[4])
Definition: ops_dispatch.c:135
safe_blocks_offset
static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size, ptrdiff_t safe_offset, const int32_t *offset_bytes)
Definition: ops_dispatch.c:170
SwsFrame
Represents a view into a single field of frame data.
Definition: format.h:210
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
avassert.h
AV_LOG_TRACE
#define AV_LOG_TRACE
Extremely verbose debugging, useful for libav* development.
Definition: log.h:236
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
SwsFrame::format
enum AVPixelFormat format
Definition: format.h:219
SwsPass::priv
void * priv
Definition: graph.h:110
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
bits
uint8_t bits
Definition: vp3data.h:128
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
AV_LOG_DEBUG
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:231
SwsGraph::num_passes
int num_passes
Definition: graph.h:132
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
AVPixFmtDescriptor::log2_chroma_w
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:80
ff_sws_op_list_output
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
Definition: ops.c:680
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:73
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
SwsOpPass::comp
SwsCompiledOp comp
Definition: ops_dispatch.c:33
SwsOpBackend
Definition: ops_dispatch.h:130
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
if
if(ret)
Definition: filter_design.txt:179
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
ff_sws_op_list_is_noop
bool ff_sws_op_list_is_noop(const SwsOpList *ops)
Returns whether an op list represents a true no-op operation, i.e.
Definition: ops.c:719
op_pass_free
static void op_pass_free(void *ptr)
Definition: ops_dispatch.c:121
NULL
#define NULL
Definition: coverity.c:32
ff_sws_compiled_op_unref
void ff_sws_compiled_op_unref(SwsCompiledOp *comp)
Definition: ops_dispatch.c:113
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
av_fast_mallocz
void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size)
Allocate and clear a buffer, reusing the given one if large enough.
Definition: mem.c:562
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:74
av_cpu_max_align
size_t av_cpu_max_align(void)
Get the maximum data alignment that may be required by FFmpeg.
Definition: cpu.c:287
rw_pixel_bits
static int rw_pixel_bits(const SwsOp *op)
Definition: ops_dispatch.c:451
compile
static int compile(SwsGraph *graph, const SwsOpBackend *backend, const SwsOpList *ops, SwsPass *input, SwsPass **output)
Definition: ops_dispatch.c:473
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
AV_ROUND_DOWN
@ AV_ROUND_DOWN
Round toward -infinity.
Definition: mathematics.h:133
SwsPass::height
int height
Definition: graph.h:85
SwsOpExec::block_size_out
int32_t block_size_out
Definition: ops_dispatch.h:58
copy
static void copy(const float *p1, float *p2, const int length)
Definition: vf_vaguedenoiser.c:186
SwsFrame::height
int height
Definition: format.h:218
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
SwsOpExec::in_sub_x
uint8_t in_sub_x[4]
Definition: ops_dispatch.h:62
cpu.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
av_err2str
#define av_err2str(errnum)
Convenience macro, the return value should be used only directly in function arguments but never stan...
Definition: error.h:122
size
int size
Definition: twinvq_data.h:10344
op_pass_setup
static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, const SwsPass *pass)
Definition: ops_dispatch.c:180
SwsOpPass::offsets_y
int * offsets_y
Definition: ops_dispatch.c:47
SwsOpList::src
SwsFormat src
Definition: ops.h:293
ff_sws_op_list_update_comps
void ff_sws_op_list_update_comps(SwsOpList *ops)
Infer + propagate known information about components.
Definition: ops.c:341
compile_backend
static int compile_backend(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Definition: ops_dispatch.c:57
SwsFormat
Definition: format.h:77
align
static const uint8_t *BS_FUNC() align(BSCTX *bc)
Skip bits to a byte boundary.
Definition: bitstream_template.h:419
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
SwsPass::output
SwsPassBuffer * output
Filter output buffer.
Definition: graph.h:98
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
line
Definition: graph2dot.c:48
SWS_OP_FLAG_OPTIMIZE
@ SWS_OP_FLAG_OPTIMIZE
Definition: ops.h:372
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
SwsOpPass::planes_in
int planes_in
Definition: ops_dispatch.c:41
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
SwsOpExec::out
uint8_t * out[4]
Definition: ops_dispatch.h:38
ff_sws_op_list_optimize
int ff_sws_op_list_optimize(SwsOpList *ops)
Fuse compatible and eliminate redundant operations, as well as replacing some operations with more ef...
Definition: ops_optimizer.c:340
SwsPassBuffer::width_align
int width_align
Definition: graph.h:66
SwsOpPass::pixel_bits_out
int pixel_bits_out
Definition: ops_dispatch.c:44
SwsOpExec::in_offset_x
int32_t * in_offset_x
Pixel offset map; for horizontal scaling, in bytes.
Definition: ops_dispatch.h:80
SwsOpPass::planes_out
int planes_out
Definition: ops_dispatch.c:42
AV_ROUND_INF
@ AV_ROUND_INF
Round away from zero.
Definition: mathematics.h:132
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
SwsOpPass::tail_size_in
int tail_size_in
Definition: ops_dispatch.c:39
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
DECLARE_ALIGNED_32
#define DECLARE_ALIGNED_32(t, v)
Definition: mem_internal.h:113
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ops_internal.h
SwsOpPass
Copyright (C) 2025 Niklas Haas.
Definition: ops_dispatch.c:32
pixel_bytes
static size_t pixel_bytes(size_t pixels, int pixel_bits, enum AVRounding rounding)
Definition: ops_dispatch.c:146
SwsOp
Definition: ops.h:238
SwsOpExec::out_sub_y
uint8_t out_sub_y[4]
Definition: ops_dispatch.h:61
SwsOpExec::out_sub_x
uint8_t out_sub_x[4]
Definition: ops_dispatch.h:62
SwsOpPass::memcpy_first
bool memcpy_first
Definition: ops_dispatch.c:49
ff_sws_graph_add_pass
int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, int width, int height, SwsPass *input, int align, SwsPassFunc run, SwsPassSetup setup, void *priv, void(*free_cb)(void *priv), SwsPass **out_pass)
Allocate and add a new pass to the filter graph.
Definition: graph.c:127
ret
ret
Definition: filter_design.txt:187
SwsOpList::dst
SwsFormat dst
Definition: ops.h:293
SwsCompiledOp
Definition: ops_dispatch.h:100
SwsPassBuffer::width_pad
int width_pad
Definition: graph.h:67
SwsFormat::hw_format
enum AVPixelFormat hw_format
Definition: format.h:81
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
SwsOpPass::num_blocks
size_t num_blocks
Definition: ops_dispatch.c:36
safe_bytes_pad
static size_t safe_bytes_pad(int linesize, int plane_pad)
Definition: ops_dispatch.c:163
SwsOpPass::exec_base
SwsOpExec exec_base
Definition: ops_dispatch.c:34
ff_sws_compile_pass
int ff_sws_compile_pass(SwsGraph *graph, const SwsOpBackend *backend, SwsOpList **pops, int flags, SwsPass *input, SwsPass **output)
Resolves an operation list to a graph pass.
Definition: ops_dispatch.c:585
SwsOpExec::in_sub_y
uint8_t in_sub_y[4]
Definition: ops_dispatch.h:61
SwsOpPass::pixel_bits_in
int pixel_bits_in
Definition: ops_dispatch.c:43
SwsOpPass::tail_off_in
int tail_off_in
Definition: ops_dispatch.c:37
SwsOpPass::memcpy_out
bool memcpy_out
Definition: ops_dispatch.c:51
mem.h
SwsGraph
Filter graph, which represents a 'baked' pixel format conversion.
Definition: graph.h:121
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
align_pass
static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
Definition: ops_dispatch.c:460
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
op_pass_run
static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y, const int h, const SwsPass *pass)
Definition: ops_dispatch.c:348
int32_t
int32_t
Definition: audioconvert.c:56
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
SwsPassBuffer
Represents an output buffer for a filter pass.
Definition: graph.h:59
h
h
Definition: vp9dsp_template.c:2070
width
#define width
Definition: dsp.h:89
SwsOpList::plane_src
uint8_t plane_src[4]
Definition: ops.h:296
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:288
SwsContext
Main external API structure.
Definition: swscale.h:206
SwsOpPass::tail_off_out
int tail_off_out
Definition: ops_dispatch.c:38
SwsFrame::linesize
int linesize[4]
Definition: format.h:213
AVPixFmtDescriptor::log2_chroma_h
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:89
src
#define src
Definition: vp8dsp.c:248
SwsOpExec::out_bump
ptrdiff_t out_bump[4]
Definition: ops_dispatch.h:52
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
ff_sws_ops_compile
int ff_sws_ops_compile(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Attempt to compile a list of operations using a specific backend, or the best available backend if ba...
Definition: ops_dispatch.c:93
ff_sws_graph_rollback
void ff_sws_graph_rollback(SwsGraph *graph, int since_idx)
Remove all passes added since the given index.
Definition: graph.c:830