FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/random_seed.h"
23 #include "libavutil/vulkan_spirv.h"
24 #include "libavutil/opt.h"
25 #include "vulkan_filter.h"
26 
27 #include "filters.h"
28 #include "video.h"
29 
30 #define TYPE_NAME "vec4"
31 #define TYPE_ELEMS 4
32 #define TYPE_SIZE (TYPE_ELEMS*4)
33 
34 typedef struct NLMeansVulkanContext {
36 
40 
43 
45 
49 
50  int *xoffsets;
51  int *yoffsets;
53  float strength[4];
54  int patch[4];
55 
56  struct nlmeans_opts {
57  int r;
58  double s;
59  double sc[4];
60  int p;
61  int pc[4];
62  int t;
63  } opts;
65 
66 static void insert_first(FFVulkanShader *shd, int r, const char *off, int horiz, int plane, int comp)
67 {
68  GLSLF(4, s1 = imageLoad(input_img[%i], pos + ivec2(%i + %s, %i + %s))[%i];
69  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
70 
71  GLSLF(4, s2[0] = imageLoad(input_img[%i], pos + offs[0] + ivec2(%i + %s, %i + %s))[%i];
72  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
73  GLSLF(4, s2[1] = imageLoad(input_img[%i], pos + offs[1] + ivec2(%i + %s, %i + %s))[%i];
74  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
75  GLSLF(4, s2[2] = imageLoad(input_img[%i], pos + offs[2] + ivec2(%i + %s, %i + %s))[%i];
76  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
77  GLSLF(4, s2[3] = imageLoad(input_img[%i], pos + offs[3] + ivec2(%i + %s, %i + %s))[%i];
78  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
79 
80  GLSLC(4, s2 = (s1 - s2) * (s1 - s2); );
81 }
82 
83 static void insert_horizontal_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
84 {
85  GLSLF(1, pos.y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
86  if (!first)
87  GLSLC(1, barrier(); );
88  GLSLC(0, );
89  GLSLF(1, if (pos.y < height[%i]) { ,plane);
90  GLSLC(2, #pragma unroll(1) );
91  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
92  GLSLC(3, prefix_sum = DTYPE(0); );
93  GLSLC(3, offset = int_stride * uint64_t(pos.y + r); );
94  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
95  GLSLC(0, );
96  GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
97  if (first)
98  insert_first(shd, 0, "r", 0, plane, comp);
99  else
100  GLSLC(4, s2 = dst.v[pos.x]; );
101  GLSLC(4, dst.v[pos.x] = s2 + prefix_sum; );
102  GLSLC(4, prefix_sum += s2; );
103  GLSLC(3, } );
104  GLSLC(2, } );
105  GLSLC(1, } );
106  GLSLC(0, );
107 }
108 
109 static void insert_vertical_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
110 {
111  GLSLF(1, pos.x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
112  GLSLC(1, #pragma unroll(1) );
113  GLSLF(1, for (r = 0; r < %i; r++) ,nb_rows);
114  GLSLC(2, psum[r] = DTYPE(0); );
115  GLSLC(0, );
116  if (!first)
117  GLSLC(1, barrier(); );
118  GLSLC(0, );
119  GLSLF(1, if (pos.x < width[%i]) { ,plane);
120  GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
121  GLSLC(3, offset = int_stride * uint64_t(pos.y); );
122  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
123  GLSLC(0, );
124  GLSLC(3, #pragma unroll(1) );
125  GLSLF(3, for (r = 0; r < %i; r++) { ,nb_rows);
126  if (first)
127  insert_first(shd, 0, "r", 1, plane, comp);
128  else
129  GLSLC(4, s2 = dst.v[pos.x + r]; );
130  GLSLC(4, dst.v[pos.x + r] = s2 + psum[r]; );
131  GLSLC(4, psum[r] += s2; );
132  GLSLC(3, } );
133  GLSLC(2, } );
134  GLSLC(1, } );
135  GLSLC(0, );
136 }
137 
138 static void insert_weights_pass(FFVulkanShader *shd, int nb_rows, int vert,
139  int t, int dst_comp, int plane, int comp)
140 {
141  GLSLF(1, p = patch_size[%i]; ,dst_comp);
142  GLSLC(0, );
143  GLSLC(1, barrier(); );
144  GLSLC(0, );
145  if (!vert) {
146  GLSLF(1, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
147  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane);
148  GLSLC(3, break; );
149  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
150  GLSLF(3, pos.x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
151  } else {
152  GLSLF(1, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
153  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane);
154  GLSLC(3, break; );
155  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
156  GLSLF(3, pos.y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
157  }
158  GLSLC(0, );
159  GLSLC(3, a = DTYPE(0); );
160  GLSLC(3, b = DTYPE(0); );
161  GLSLC(3, c = DTYPE(0); );
162  GLSLC(3, d = DTYPE(0); );
163  GLSLC(0, );
164  GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); );
165  GLSLC(0, );
166  GLSLF(3, src[0] = imageLoad(input_img[%i], pos + offs[0])[%i]; ,plane, comp);
167  GLSLF(3, src[1] = imageLoad(input_img[%i], pos + offs[1])[%i]; ,plane, comp);
168  GLSLF(3, src[2] = imageLoad(input_img[%i], pos + offs[2])[%i]; ,plane, comp);
169  GLSLF(3, src[3] = imageLoad(input_img[%i], pos + offs[3])[%i]; ,plane, comp);
170  GLSLC(0, );
171  GLSLC(3, if (lt == false) { );
172  GLSLC(3, offset = int_stride * uint64_t(pos.y - p); );
173  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
174  GLSLC(4, a = dst.v[pos.x - p]; );
175  GLSLC(4, c = dst.v[pos.x + p]; );
176  GLSLC(3, offset = int_stride * uint64_t(pos.y + p); );
177  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
178  GLSLC(4, b = dst.v[pos.x - p]; );
179  GLSLC(4, d = dst.v[pos.x + p]; );
180  GLSLC(3, } );
181  GLSLC(0, );
182  GLSLC(3, patch_diff = d + a - b - c; );
183  GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp);
184  GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; );
185  GLSLC(3, sum = dot(w, src*255); );
186  GLSLC(0, );
187  if (t > 1) {
188  GLSLF(3, atomicAdd(weights_%i[pos.y*ws_stride[%i] + pos.x], w_sum); ,dst_comp, dst_comp);
189  GLSLF(3, atomicAdd(sums_%i[pos.y*ws_stride[%i] + pos.x], sum); ,dst_comp, dst_comp);
190  } else {
191  GLSLF(3, weights_%i[pos.y*ws_stride[%i] + pos.x] += w_sum; ,dst_comp, dst_comp);
192  GLSLF(3, sums_%i[pos.y*ws_stride[%i] + pos.x] += sum; ,dst_comp, dst_comp);
193  }
194  GLSLC(2, } );
195  GLSLC(1, } );
196 }
197 
198 typedef struct HorizontalPushData {
199  uint32_t width[4];
200  uint32_t height[4];
201  uint32_t ws_stride[4];
202  int32_t patch_size[4];
203  float strength[4];
204  VkDeviceAddress integral_base;
205  uint64_t integral_size;
206  uint64_t int_stride;
207  uint32_t xyoffs_start;
208 } HorizontalPushData;
209 
210 static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
211  FFVulkanShader *shd,
212  FFVkSPIRVCompiler *spv,
213  int width, int height, int t,
214  const AVPixFmtDescriptor *desc,
215  int planes, int *nb_rows)
216 {
217  int err;
218  uint8_t *spv_data;
219  size_t spv_len;
220  void *spv_opaque = NULL;
222  int max_dim = FFMAX(width, height);
223  uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0];
224  int wg_size, wg_rows;
225 
226  /* Round the max workgroup size to the previous power of two */
227  wg_size = max_wg;
228  wg_rows = 1;
229 
230  if (max_wg > max_dim) {
231  wg_size = max_dim;
232  } else if (max_wg < max_dim) {
233  /* Make it fit */
234  while (wg_size*wg_rows < max_dim)
235  wg_rows++;
236  }
237 
238  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_weights",
239  VK_SHADER_STAGE_COMPUTE_BIT,
240  (const char *[]) { "GL_EXT_buffer_reference",
241  "GL_EXT_buffer_reference2" }, 2,
242  wg_size, 1, 1,
243  0));
244 
245  *nb_rows = wg_rows;
246 
247  if (t > 1)
248  GLSLC(0, #extension GL_EXT_shader_atomic_float : require );
249  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
250  GLSLC(0, );
251  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
252  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
253  GLSLC(0, );
254  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
255  GLSLC(1, DTYPE v[]; );
256  GLSLC(0, }; );
257  GLSLC(0, );
258  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
259  GLSLC(1, uvec4 width; );
260  GLSLC(1, uvec4 height; );
261  GLSLC(1, uvec4 ws_stride; );
262  GLSLC(1, ivec4 patch_size; );
263  GLSLC(1, vec4 strength; );
264  GLSLC(1, DataBuffer integral_base; );
265  GLSLC(1, uint64_t integral_size; );
266  GLSLC(1, uint64_t int_stride; );
267  GLSLC(1, uint xyoffs_start; );
268  GLSLC(0, }; );
269  GLSLC(0, );
270 
271  ff_vk_shader_add_push_const(shd, 0, sizeof(HorizontalPushData),
272  VK_SHADER_STAGE_COMPUTE_BIT);
273 
274  desc_set = (FFVulkanDescriptorSetBinding []) {
275  {
276  .name = "input_img",
277  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
278  .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT),
279  .mem_quali = "readonly",
280  .dimensions = 2,
281  .elems = planes,
282  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
283  },
284  {
285  .name = "weights_buffer_0",
286  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
287  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
288  .buf_content = "float weights_0[];",
289  },
290  {
291  .name = "sums_buffer_0",
292  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
293  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
294  .buf_content = "float sums_0[];",
295  },
296  {
297  .name = "weights_buffer_1",
298  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
299  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
300  .buf_content = "float weights_1[];",
301  },
302  {
303  .name = "sums_buffer_1",
304  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
305  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
306  .buf_content = "float sums_1[];",
307  },
308  {
309  .name = "weights_buffer_2",
310  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
311  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
312  .buf_content = "float weights_2[];",
313  },
314  {
315  .name = "sums_buffer_2",
316  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
317  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
318  .buf_content = "float sums_2[];",
319  },
320  {
321  .name = "weights_buffer_3",
322  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
323  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
324  .buf_content = "float weights_3[];",
325  },
326  {
327  .name = "sums_buffer_3",
328  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
329  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
330  .buf_content = "float sums_3[];",
331  },
332  };
333  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1 + 2*desc->nb_components, 0, 0));
334 
335  desc_set = (FFVulkanDescriptorSetBinding []) {
336  {
337  .name = "xyoffsets_buffer",
338  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
339  .mem_quali = "readonly",
340  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
341  .buf_content = "ivec2 xyoffsets[];",
342  },
343  };
344  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0));
345 
346  GLSLC(0, );
347  GLSLC(0, void main() );
348  GLSLC(0, { );
349  GLSLC(1, uint64_t offset; );
350  GLSLC(1, DataBuffer dst; );
351  GLSLC(1, float s1; );
352  GLSLC(1, DTYPE s2; );
353  GLSLC(1, DTYPE prefix_sum; );
354  GLSLF(1, DTYPE psum[%i]; ,*nb_rows);
355  GLSLC(1, int r; );
356  GLSLC(1, ivec2 pos; );
357  GLSLC(1, int p; );
358  GLSLC(0, );
359  GLSLC(1, DataBuffer integral_data; );
360  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
361  GLSLC(0, );
362  GLSLC(1, int invoc_idx = int(gl_WorkGroupID.z); );
363  GLSLC(0, );
364  GLSLC(1, offset = integral_size * invoc_idx; );
365  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
366  for (int i = 0; i < TYPE_ELEMS; i++)
367  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
368  GLSLC(0, );
369  GLSLC(1, DTYPE a; );
370  GLSLC(1, DTYPE b; );
371  GLSLC(1, DTYPE c; );
372  GLSLC(1, DTYPE d; );
373  GLSLC(0, );
374  GLSLC(1, DTYPE patch_diff; );
375  if (TYPE_ELEMS == 4) {
376  GLSLC(1, vec4 src; );
377  GLSLC(1, vec4 w; );
378  } else {
379  GLSLC(1, vec4 src[4]; );
380  GLSLC(1, vec4 w[4]; );
381  }
382  GLSLC(1, float w_sum; );
383  GLSLC(1, float sum; );
384  GLSLC(0, );
385  GLSLC(1, bool lt; );
386  GLSLC(1, bool gt; );
387  GLSLC(0, );
388 
389  for (int i = 0; i < desc->nb_components; i++) {
390  int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
391  if (width >= height) {
392  insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
393  insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
394  insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off);
395  } else {
396  insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
397  insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
398  insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off);
399  }
400  }
401 
402  GLSLC(0, } );
403 
404  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
405  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
406 
407  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
408 
409 fail:
410  if (spv_opaque)
411  spv->free_shader(spv, &spv_opaque);
412 
413  return err;
414 }
415 
416 typedef struct DenoisePushData {
417  uint32_t ws_stride[4];
418 } DenoisePushData;
419 
420 static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
422  const AVPixFmtDescriptor *desc, int planes)
423 {
424  int err;
425  uint8_t *spv_data;
426  size_t spv_len;
427  void *spv_opaque = NULL;
429 
430  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_denoise",
431  VK_SHADER_STAGE_COMPUTE_BIT,
432  (const char *[]) { "GL_EXT_buffer_reference",
433  "GL_EXT_buffer_reference2" }, 2,
434  32, 32, 1,
435  0));
436 
437  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
438  GLSLC(1, uvec4 ws_stride; );
439  GLSLC(0, }; );
440 
441  ff_vk_shader_add_push_const(shd, 0, sizeof(DenoisePushData),
442  VK_SHADER_STAGE_COMPUTE_BIT);
443 
444  desc_set = (FFVulkanDescriptorSetBinding []) {
445  {
446  .name = "input_img",
447  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
448  .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT),
449  .mem_quali = "readonly",
450  .dimensions = 2,
451  .elems = planes,
452  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
453  },
454  {
455  .name = "output_img",
456  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
457  .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format, FF_VK_REP_FLOAT),
458  .mem_quali = "writeonly",
459  .dimensions = 2,
460  .elems = planes,
461  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
462  },
463  };
464  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0));
465 
466  desc_set = (FFVulkanDescriptorSetBinding []) {
467  {
468  .name = "weights_buffer_0",
469  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
470  .mem_quali = "readonly",
471  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
472  .buf_content = "float weights_0[];",
473  },
474  {
475  .name = "sums_buffer_0",
476  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
477  .mem_quali = "readonly",
478  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
479  .buf_content = "float sums_0[];",
480  },
481  {
482  .name = "weights_buffer_1",
483  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
484  .mem_quali = "readonly",
485  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
486  .buf_content = "float weights_1[];",
487  },
488  {
489  .name = "sums_buffer_1",
490  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
491  .mem_quali = "readonly",
492  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
493  .buf_content = "float sums_1[];",
494  },
495  {
496  .name = "weights_buffer_2",
497  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
498  .mem_quali = "readonly",
499  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
500  .buf_content = "float weights_2[];",
501  },
502  {
503  .name = "sums_buffer_2",
504  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
505  .mem_quali = "readonly",
506  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
507  .buf_content = "float sums_2[];",
508  },
509  {
510  .name = "weights_buffer_3",
511  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
512  .mem_quali = "readonly",
513  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
514  .buf_content = "float weights_3[];",
515  },
516  {
517  .name = "sums_buffer_3",
518  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
519  .mem_quali = "readonly",
520  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
521  .buf_content = "float sums_3[];",
522  },
523  };
524 
525  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2*desc->nb_components, 0, 0));
526 
527  GLSLC(0, void main() );
528  GLSLC(0, { );
529  GLSLC(1, ivec2 size; );
530  GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
531  GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); );
532  GLSLC(0, );
533  GLSLC(1, float w_sum; );
534  GLSLC(1, float sum; );
535  GLSLC(1, vec4 src; );
536  GLSLC(1, vec4 r; );
537  GLSLC(0, );
538  GLSLC(1, size = imageSize(output_img[plane]); );
539  GLSLC(1, if (!IS_WITHIN(pos, size)) );
540  GLSLC(2, return; );
541  GLSLC(0, );
542  GLSLC(1, src = imageLoad(input_img[plane], pos); );
543  GLSLC(0, );
544  for (int c = 0; c < desc->nb_components; c++) {
545  int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8);
546  GLSLF(1, if (plane == %i) { ,desc->comp[c].plane);
547  GLSLF(2, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
548  GLSLF(2, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
549  GLSLF(2, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off);
550  GLSLC(1, } );
551  GLSLC(0, );
552  }
553  GLSLC(1, imageStore(output_img[plane], pos, r); );
554  GLSLC(0, } );
555 
556  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
557  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
558 
559  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
560 
561 fail:
562  if (spv_opaque)
563  spv->free_shader(spv, &spv_opaque);
564 
565  return err;
566 }
567 
569 {
570  int rad, err;
571  int xcnt = 0, ycnt = 0;
572  NLMeansVulkanContext *s = ctx->priv;
573  FFVulkanContext *vkctx = &s->vkctx;
574  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
575  FFVkSPIRVCompiler *spv = NULL;
576  int *offsets_buf;
577  int offsets_dispatched = 0, nb_dispatches = 0;
578 
579  const AVPixFmtDescriptor *desc;
581  if (!desc)
582  return AVERROR(EINVAL);
583 
584  if (!(s->opts.r & 1)) {
585  s->opts.r |= 1;
586  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
587  s->opts.r);
588  }
589 
590  if (!(s->opts.p & 1)) {
591  s->opts.p |= 1;
592  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
593  s->opts.p);
594  }
595 
596  for (int i = 0; i < 4; i++) {
597  double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s;
598  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
599  str = 10.0f*str;
600  str *= -str;
601  str = 255.0*255.0 / str;
602  s->strength[i] = str;
603  if (!(ps & 1)) {
604  ps |= 1;
605  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
606  ps);
607  }
608  s->patch[i] = ps / 2;
609  }
610 
611  rad = s->opts.r/2;
612  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
613  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
614  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
615  s->nb_offsets = 0;
616 
617  for (int x = -rad; x <= rad; x++) {
618  for (int y = -rad; y <= rad; y++) {
619  if (!x && !y)
620  continue;
621 
622  s->xoffsets[xcnt++] = x;
623  s->yoffsets[ycnt++] = y;
624  s->nb_offsets++;
625  }
626  }
627 
628  RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL,
629  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
630  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
631  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
632  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
633  RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0));
634 
635  for (int i = 0; i < 2*s->nb_offsets; i += 2) {
636  offsets_buf[i + 0] = s->xoffsets[i >> 1];
637  offsets_buf[i + 1] = s->yoffsets[i >> 1];
638  }
639 
640  RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1));
641 
642  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
643  if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) {
644  av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, "
645  "disabling dispatch parallelism\n");
646  s->opts.t = 1;
647  }
648 
649  spv = ff_vk_spirv_init();
650  if (!spv) {
651  av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
652  return AVERROR_EXTERNAL;
653  }
654 
655  s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
656  if (!s->qf) {
657  av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n");
658  err = AVERROR(ENOTSUP);
659  goto fail;
660  }
661 
662  RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, 1, 0, 0, 0, NULL));
663 
664  RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights,
665  spv, s->vkctx.output_width, s->vkctx.output_height,
666  s->opts.t, desc, planes, &s->pl_weights_rows));
667 
668  RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise,
669  spv, desc, planes));
670 
671  RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_weights,
672  1, 0, 0,
673  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
674  VK_FORMAT_UNDEFINED));
675 
676  do {
677  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
678  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
679  offsets_dispatched += wg_invoc * TYPE_ELEMS;
680  nb_dispatches++;
681  } while (offsets_dispatched < s->nb_offsets);
682 
683  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n",
684  s->nb_offsets, nb_dispatches);
685 
686  s->initialized = 1;
687 
688 fail:
689  if (spv)
690  spv->uninit(&spv);
691 
692  return err;
693 }
694 
695 static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
696  FFVkBuffer *ws_vk, uint32_t ws_stride[4])
697 {
698  FFVulkanContext *vkctx = &s->vkctx;
699  FFVulkanFunctions *vk = &vkctx->vkfn;
700  VkBufferMemoryBarrier2 buf_bar[8];
701  int nb_buf_bar = 0;
702 
703  DenoisePushData pd = {
704  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
705  };
706 
707  /* Denoise pass pipeline */
708  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_denoise);
709 
710  /* Push data */
711  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_denoise,
712  VK_SHADER_STAGE_COMPUTE_BIT,
713  0, sizeof(pd), &pd);
714 
715  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
716  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
717  .srcStageMask = ws_vk->stage,
718  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
719  .srcAccessMask = ws_vk->access,
720  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
721  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
722  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
723  .buffer = ws_vk->buf,
724  .size = ws_vk->size,
725  .offset = 0,
726  };
727 
728  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
729  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
730  .pBufferMemoryBarriers = buf_bar,
731  .bufferMemoryBarrierCount = nb_buf_bar,
732  });
733  ws_vk->stage = buf_bar[0].dstStageMask;
734  ws_vk->access = buf_bar[0].dstAccessMask;
735 
736  /* End of denoise pass */
737  vk->CmdDispatch(exec->buf,
738  FFALIGN(vkctx->output_width, s->shd_denoise.lg_size[0])/s->shd_denoise.lg_size[0],
739  FFALIGN(vkctx->output_height, s->shd_denoise.lg_size[1])/s->shd_denoise.lg_size[1],
740  av_pix_fmt_count_planes(s->vkctx.output_format));
741 
742  return 0;
743 }
744 
745 static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
746 {
747  int err;
748  AVFrame *out = NULL;
749  AVFilterContext *ctx = link->dst;
750  NLMeansVulkanContext *s = ctx->priv;
751  AVFilterLink *outlink = ctx->outputs[0];
752  FFVulkanContext *vkctx = &s->vkctx;
753  FFVulkanFunctions *vk = &vkctx->vkfn;
754 
755  const AVPixFmtDescriptor *desc;
756  int plane_widths[4];
757  int plane_heights[4];
758 
759  int offsets_dispatched = 0;
760 
761  /* Integral */
762  AVBufferRef *integral_buf = NULL;
763  FFVkBuffer *integral_vk;
764  size_t int_stride;
765  size_t int_size;
766 
767  /* Weights/sums */
768  AVBufferRef *ws_buf = NULL;
769  FFVkBuffer *ws_vk;
770  VkDeviceSize weights_offs[4];
771  VkDeviceSize sums_offs[4];
772  uint32_t ws_stride[4];
773  size_t ws_size[4];
774  size_t ws_total_size = 0;
775 
776  FFVkExecContext *exec;
777  VkImageView in_views[AV_NUM_DATA_POINTERS];
778  VkImageView out_views[AV_NUM_DATA_POINTERS];
779  VkImageMemoryBarrier2 img_bar[8];
780  int nb_img_bar = 0;
781  VkBufferMemoryBarrier2 buf_bar[8];
782  int nb_buf_bar = 0;
783 
784  if (!s->initialized)
785  RET(init_filter(ctx));
786 
788  if (!desc)
789  return AVERROR(EINVAL);
790 
791  /* Integral image */
792  int_stride = s->shd_weights.lg_size[0]*s->pl_weights_rows*TYPE_SIZE;
793  int_size = s->shd_weights.lg_size[0]*s->pl_weights_rows*int_stride;
794 
795  /* Plane dimensions */
796  for (int i = 0; i < desc->nb_components; i++) {
797  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
798  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w);
799  plane_widths[i] = FFALIGN(plane_widths[i], s->shd_denoise.lg_size[0]);
800  plane_heights[i] = FFALIGN(plane_heights[i], s->shd_denoise.lg_size[1]);
801 
802  ws_stride[i] = plane_widths[i];
803  ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float);
804  ws_total_size += ws_size[i];
805  }
806 
807  /* Buffers */
808  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
809  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
810  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
811  NULL,
812  s->opts.t * int_size,
813  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
814  if (err < 0)
815  return err;
816  integral_vk = (FFVkBuffer *)integral_buf->data;
817 
818  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
819  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
820  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
821  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
822  NULL,
823  ws_total_size * 2,
824  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
825  if (err < 0)
826  return err;
827  ws_vk = (FFVkBuffer *)ws_buf->data;
828 
829  weights_offs[0] = 0;
830  sums_offs[0] = ws_total_size;
831  for (int i = 1; i < desc->nb_components; i++) {
832  weights_offs[i] = weights_offs[i - 1] + ws_size[i - 1];
833  sums_offs[i] = sums_offs[i - 1] + ws_size[i - 1];
834  }
835 
836  /* Output frame */
837  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
838  if (!out) {
839  err = AVERROR(ENOMEM);
840  goto fail;
841  }
842 
843  /* Execution context */
844  exec = ff_vk_exec_get(&s->vkctx, &s->e);
845  ff_vk_exec_start(vkctx, exec);
846 
847  /* Dependencies */
848  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
849  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
850  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
851  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
852  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
853  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
854 
855  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
856  integral_buf = NULL;
857 
858  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
859  ws_buf = NULL;
860 
861  /* Input frame prep */
862  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT));
863  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
864  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
865  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
866  VK_ACCESS_SHADER_READ_BIT,
867  VK_IMAGE_LAYOUT_GENERAL,
868  VK_QUEUE_FAMILY_IGNORED);
869 
870  /* Output frame prep */
871  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT));
872  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
873  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
874  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
875  VK_ACCESS_SHADER_WRITE_BIT,
876  VK_IMAGE_LAYOUT_GENERAL,
877  VK_QUEUE_FAMILY_IGNORED);
878 
879  nb_buf_bar = 0;
880  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
881  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
882  .srcStageMask = ws_vk->stage,
883  .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
884  .srcAccessMask = ws_vk->access,
885  .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
886  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
887  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
888  .buffer = ws_vk->buf,
889  .size = ws_vk->size,
890  .offset = 0,
891  };
892  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
893  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
894  .srcStageMask = integral_vk->stage,
895  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
896  .srcAccessMask = integral_vk->access,
897  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
898  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
899  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
900  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
901  .buffer = integral_vk->buf,
902  .size = integral_vk->size,
903  .offset = 0,
904  };
905 
906  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
907  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
908  .pImageMemoryBarriers = img_bar,
909  .imageMemoryBarrierCount = nb_img_bar,
910  .pBufferMemoryBarriers = buf_bar,
911  .bufferMemoryBarrierCount = nb_buf_bar,
912  });
913  ws_vk->stage = buf_bar[0].dstStageMask;
914  ws_vk->access = buf_bar[0].dstAccessMask;
915  integral_vk->stage = buf_bar[1].dstStageMask;
916  integral_vk->access = buf_bar[1].dstAccessMask;
917 
918  /* Buffer zeroing */
919  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
920 
921  nb_buf_bar = 0;
922  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
923  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
924  .srcStageMask = ws_vk->stage,
925  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
926  .srcAccessMask = ws_vk->access,
927  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
928  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
929  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
930  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
931  .buffer = ws_vk->buf,
932  .size = ws_vk->size,
933  .offset = 0,
934  };
935 
936  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
937  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
938  .pBufferMemoryBarriers = buf_bar,
939  .bufferMemoryBarrierCount = nb_buf_bar,
940  });
941  ws_vk->stage = buf_bar[0].dstStageMask;
942  ws_vk->access = buf_bar[0].dstAccessMask;
943 
944  /* Update weights descriptors */
945  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_weights, in, in_views, 0, 0,
946  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
947  for (int i = 0; i < desc->nb_components; i++) {
948  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 0, 0,
949  ws_vk, weights_offs[i], ws_size[i],
950  VK_FORMAT_UNDEFINED));
951  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 1, 0,
952  ws_vk, sums_offs[i], ws_size[i],
953  VK_FORMAT_UNDEFINED));
954  }
955 
956  /* Update denoise descriptors */
957  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, in, in_views, 0, 0,
958  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
959  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, out, out_views, 0, 1,
960  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
961  for (int i = 0; i < desc->nb_components; i++) {
962  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 0, 0,
963  ws_vk, weights_offs[i], ws_size[i],
964  VK_FORMAT_UNDEFINED));
965  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 1, 0,
966  ws_vk, sums_offs[i], ws_size[i],
967  VK_FORMAT_UNDEFINED));
968  }
969 
970  /* Weights pipeline */
971  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_weights);
972 
973  do {
974  int wg_invoc;
975  HorizontalPushData pd = {
976  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
977  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
978  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
979  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
980  { s->strength[0], s->strength[1], s->strength[2], s->strength[2], },
981  integral_vk->address,
982  (uint64_t)int_size,
983  (uint64_t)int_stride,
984  offsets_dispatched,
985  };
986 
987  /* Push data */
988  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_weights,
989  VK_SHADER_STAGE_COMPUTE_BIT,
990  0, sizeof(pd), &pd);
991 
992  if (offsets_dispatched) {
993  nb_buf_bar = 0;
994  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
995  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
996  .srcStageMask = integral_vk->stage,
997  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
998  .srcAccessMask = integral_vk->access,
999  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
1000  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
1001  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1002  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1003  .buffer = integral_vk->buf,
1004  .size = integral_vk->size,
1005  .offset = 0,
1006  };
1007 
1008  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1009  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1010  .pBufferMemoryBarriers = buf_bar,
1011  .bufferMemoryBarrierCount = nb_buf_bar,
1012  });
1013  integral_vk->stage = buf_bar[1].dstStageMask;
1014  integral_vk->access = buf_bar[1].dstAccessMask;
1015  }
1016 
1017  wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
1018  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
1019 
1020  /* End of horizontal pass */
1021  vk->CmdDispatch(exec->buf, 1, 1, wg_invoc);
1022 
1023  offsets_dispatched += wg_invoc * TYPE_ELEMS;
1024  } while (offsets_dispatched < s->nb_offsets);
1025 
1026  RET(denoise_pass(s, exec, ws_vk, ws_stride));
1027 
1028  err = ff_vk_exec_submit(vkctx, exec);
1029  if (err < 0)
1030  return err;
1031 
1032  err = av_frame_copy_props(out, in);
1033  if (err < 0)
1034  goto fail;
1035 
1036  av_frame_free(&in);
1037 
1038  return ff_filter_frame(outlink, out);
1039 
1040 fail:
1041  av_buffer_unref(&integral_buf);
1042  av_buffer_unref(&ws_buf);
1043  av_frame_free(&in);
1044  av_frame_free(&out);
1045  return err;
1046 }
1047 
1048 static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
1049 {
1050  NLMeansVulkanContext *s = avctx->priv;
1051  FFVulkanContext *vkctx = &s->vkctx;
1052 
1053  ff_vk_exec_pool_free(vkctx, &s->e);
1054  ff_vk_shader_free(vkctx, &s->shd_weights);
1055  ff_vk_shader_free(vkctx, &s->shd_denoise);
1056 
1057  av_buffer_pool_uninit(&s->integral_buf_pool);
1058  av_buffer_pool_uninit(&s->ws_buf_pool);
1059 
1060  ff_vk_uninit(&s->vkctx);
1061 
1062  av_freep(&s->xoffsets);
1063  av_freep(&s->yoffsets);
1064 
1065  s->initialized = 0;
1066 }
1067 
1068 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1069 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1070 static const AVOption nlmeans_vulkan_options[] = {
1071  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1072  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
1073  { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
1074  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS },
1075 
1076  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1077  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1078  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1079  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1080 
1081  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1082  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1083  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1084  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1085 
1086  { NULL }
1087 };
1088 
1089 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
1090 
1091 static const AVFilterPad nlmeans_vulkan_inputs[] = {
1092  {
1093  .name = "default",
1094  .type = AVMEDIA_TYPE_VIDEO,
1095  .filter_frame = &nlmeans_vulkan_filter_frame,
1096  .config_props = &ff_vk_filter_config_input,
1097  },
1098 };
1099 
1100 static const AVFilterPad nlmeans_vulkan_outputs[] = {
1101  {
1102  .name = "default",
1103  .type = AVMEDIA_TYPE_VIDEO,
1104  .config_props = &ff_vk_filter_config_output,
1105  },
1106 };
1107 
1109  .p.name = "nlmeans_vulkan",
1110  .p.description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1111  .p.priv_class = &nlmeans_vulkan_class,
1112  .p.flags = AVFILTER_FLAG_HWDEVICE,
1113  .priv_size = sizeof(NLMeansVulkanContext),
1114  .init = &ff_vk_filter_init,
1115  .uninit = &nlmeans_vulkan_uninit,
1116  FILTER_INPUTS(nlmeans_vulkan_inputs),
1117  FILTER_OUTPUTS(nlmeans_vulkan_outputs),
1119  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
1120 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:116
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:60
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:928
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:215
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:308
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
Free a shader.
Definition: vulkan.c:2572
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, VkPipelineStageFlags stage, const char *extensions[], int nb_extensions, int lg_x, int lg_y, int lg_z, uint32_t required_subgroup_size)
Initialize a shader object, with a specific set of extensions, type+bind, local group size,...
Definition: vulkan.c:1715
out
FILE * out
Definition: movenc.c:55
NLMeansVulkanContext::shd_weights
FFVulkanShader shd_weights
Definition: vf_nlmeans_vulkan.c:47
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:81
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1078
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3248
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
RET
#define RET(x)
Definition: vulkan.h:67
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:296
planes
static const struct @475 planes[]
FFVkBuffer::access
VkAccessFlags2 access
Definition: vulkan.h:97
FFVkBuffer::stage
VkPipelineStageFlags2 stage
Definition: vulkan.h:96
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:163
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:41
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:410
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:233
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:496
w
uint8_t w
Definition: llviddspenc.c:38
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:50
AVOption
AVOption.
Definition: opt.h:429
b
#define b
Definition: input.c:41
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:225
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:93
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:477
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:53
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:2613
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
FFVkSPIRVCompiler::uninit
void(* uninit)(struct FFVkSPIRVCompiler **ctx)
Definition: vulkan_spirv.h:32
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:203
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:37
video.h
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:696
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:89
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:51
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3288
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:272
fail
#define fail()
Definition: checkasm.h:193
vulkan_filter.h
insert_first
static void insert_first(FFVulkanShader *shd, int r, const char *off, int horiz, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:66
ff_vk_shader_update_img_array
void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Update a descriptor in a buffer with an image array.
Definition: vulkan.c:2515
ff_vk_shader_register_exec
int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd)
Register a shader with an exec pool.
Definition: vulkan.c:2212
NLMeansVulkanContext::shd_denoise
FFVulkanShader shd_denoise
Definition: vf_nlmeans_vulkan.c:48
ff_vk_shader_add_descriptor_set
int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only)
Add descriptor to a shader.
Definition: vulkan.c:2079
FFVulkanContext::atomic_float_feats
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats
Definition: vulkan.h:290
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:44
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
av_cold
#define av_cold
Definition: attributes.h:90
main
int main
Definition: dovi_rpuenc.c:37
FFFilter
Definition: filters.h:265
float
float
Definition: af_crystalizer.c:122
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:307
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:42
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Underlying C type is double.
Definition: opt.h:267
FLAGS
#define FLAGS
insert_weights_pass
static void insert_weights_pass(FFVulkanShader *shd, int nb_rows, int vert, int t, int dst_comp, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:138
filters.h
FF_VK_REP_FLOAT
@ FF_VK_REP_FLOAT
Definition: vulkan.h:377
ctx
AVFormatContext * ctx
Definition: movenc.c:49
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:551
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:233
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
opts
AVDictionary * opts
Definition: movenc.c:51
ff_vk_shader_rep_fmt
const char * ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, enum FFVkShaderRepFormat rep_fmt)
Definition: vulkan.c:1322
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:726
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:328
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:209
FFVkBuffer::size
size_t size
Definition: vulkan.h:92
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:56
FFVulkanContext
Definition: vulkan.h:267
AVFILTER_DEFINE_CLASS
#define AVFILTER_DEFINE_CLASS(fname)
Definition: filters.h:476
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:206
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:54
NLMeansVulkanContext::qf
AVVulkanDeviceQueueFamily * qf
Definition: vf_nlmeans_vulkan.c:39
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:34
ff_vk_shader_update_push_const
void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Update push constant in a shader.
Definition: vulkan.c:2528
NLMeansVulkanContext::xyoffsets_buf
FFVkBuffer xyoffsets_buf
Definition: vf_nlmeans_vulkan.c:44
FFVulkanDescriptorSetBinding
Definition: vulkan.h:75
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:171
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:62
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:411
FFVulkanShader
Definition: vulkan.h:183
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:309
FFVkSPIRVCompiler::compile_shader
int(* compile_shader)(FFVulkanContext *s, struct FFVkSPIRVCompiler *ctx, FFVulkanShader *shd, uint8_t **data, size_t *size, const char *entrypoint, void **opaque)
Definition: vulkan_spirv.h:28
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
insert_horizontal_pass
static void insert_horizontal_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:83
AVERROR_EXTERNAL
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:59
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FFVkExecContext
Definition: vulkan.h:103
ff_vk_shader_update_desc_buffer
int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt)
Update a descriptor in a buffer with a buffer.
Definition: vulkan.c:2449
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:76
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:32
FFVkSPIRVCompiler
Definition: vulkan_spirv.h:26
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
NLMeansVulkanContext::pl_weights_rows
int pl_weights_rows
Definition: vf_nlmeans_vulkan.c:46
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:489
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
init_filter
static int init_filter(FilteringContext *fctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, const char *filter_spec)
Definition: transcode.c:260
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlagBits new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:1672
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ff_vk_shader_link
int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, uint8_t *spirv, size_t spirv_len, const char *entrypoint)
Link a shader into an executable.
Definition: vulkan.c:2004
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:503
insert_vertical_pass
static void insert_vertical_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:109
vulkan_spirv.h
FFVulkanContext::props
VkPhysicalDeviceProperties2 props
Definition: vulkan.h:273
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
FFVkSPIRVCompiler::free_shader
void(* free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque)
Definition: vulkan_spirv.h:31
ff_vk_exec_bind_shader
void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd)
Bind a shader.
Definition: vulkan.c:2538
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:52
TYPE_NAME
#define TYPE_NAME
Definition: vf_nlmeans_vulkan.c:30
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:1589
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:271
FFVkExecPool
Definition: vulkan.h:245
pos
unsigned int pos
Definition: spdifenc.c:414
ff_vk_shader_add_push_const
int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1231
OFFSET
#define OFFSET(x)
ff_vk_qf_find
AVVulkanDeviceQueueFamily * ff_vk_qf_find(FFVulkanContext *s, VkQueueFlagBits dev_family, VkVideoCodecOperationFlagBitsKHR vid_ops)
Chooses an appropriate QF.
Definition: vulkan.c:220
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:115
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:58
FFVulkanContext::input_format
enum AVPixelFormat input_format
Definition: vulkan.h:310
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:61
random_seed.h
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:54
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
AVFilterContext
An instance of a filter.
Definition: avfilter.h:257
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:59
desc
const char * desc
Definition: libsvtav1.c:79
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:176
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFFilter::p
AVFilter p
The public AVFilter.
Definition: filters.h:269
mem.h
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:38
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:31
FFVkBuffer
Definition: vulkan.h:88
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:811
ff_vf_nlmeans_vulkan
const FFFilter ff_vf_nlmeans_vulkan
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVVulkanDeviceQueueFamily
Definition: hwcontext_vulkan.h:33
width
#define width
Definition: dsp.h:85
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:57
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:252
FFVulkanFunctions
Definition: vulkan_functions.h:263
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1178
src
#define src
Definition: vp8dsp.c:248
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:35