FFmpeg
opencl.h
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #ifndef AVFILTER_OPENCL_H
20 #define AVFILTER_OPENCL_H
21 
22 // The intended target is OpenCL 1.2, so disable warnings for APIs
23 // deprecated after that. This primarily applies to clCreateCommandQueue(),
24 // we can't use the replacement clCreateCommandQueueWithProperties() because
25 // it was introduced in OpenCL 2.0.
26 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
27 
28 #include "libavutil/bprint.h"
29 #include "libavutil/buffer.h"
30 #include "libavutil/hwcontext.h"
32 #include "libavutil/pixfmt.h"
33 
34 #include "avfilter.h"
35 
36 typedef struct OpenCLFilterContext {
37  const AVClass *class;
38 
42 
43  cl_program program;
44 
49 
50 // Groups together information about a kernel argument
51 typedef struct OpenCLKernelArg {
52  size_t arg_size;
53  const void *arg_val;
55 
56 /**
57  * set argument to specific Kernel.
58  * This macro relies on usage of local label "fail" and variables:
59  * avctx, cle and err.
60  */
61 #define CL_SET_KERNEL_ARG(kernel, arg_num, type, arg) \
62  cle = clSetKernelArg(kernel, arg_num, sizeof(type), arg); \
63  if (cle != CL_SUCCESS) { \
64  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel " \
65  "argument %d: error %d.\n", arg_num, cle); \
66  err = AVERROR(EIO); \
67  goto fail; \
68  }
69 
70 /**
71  * A helper macro to handle OpenCL errors. It will assign errcode to
72  * variable err, log error msg, and jump to fail label on error.
73  */
74 #define CL_FAIL_ON_ERROR(errcode, ...) do { \
75  if (cle != CL_SUCCESS) { \
76  av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
77  err = errcode; \
78  goto fail; \
79  } \
80  } while(0)
81 
82 /**
83  * Create a kernel with the given name.
84  *
85  * The kernel variable in the context structure must have a name of the form
86  * kernel_<kernel_name>.
87  *
88  * The OpenCLFilterContext variable in the context structure must be named ocf.
89  *
90  * Requires the presence of a local cl_int variable named cle and a fail label for error
91  * handling.
92  */
93 #define CL_CREATE_KERNEL(ctx, kernel_name) do { \
94  ctx->kernel_ ## kernel_name = clCreateKernel(ctx->ocf.program, #kernel_name, &cle); \
95  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create %s kernel: %d.\n", #kernel_name, cle); \
96 } while(0)
97 
98 /**
99  * release an OpenCL Kernel
100  */
101 #define CL_RELEASE_KERNEL(k) \
102 do { \
103  if (k) { \
104  cle = clReleaseKernel(k); \
105  if (cle != CL_SUCCESS) \
106  av_log(avctx, AV_LOG_ERROR, "Failed to release " \
107  "OpenCL kernel: %d.\n", cle); \
108  } \
109 } while(0)
110 
111 /**
112  * release an OpenCL Memory Object
113  */
114 #define CL_RELEASE_MEMORY(m) \
115 do { \
116  if (m) { \
117  cle = clReleaseMemObject(m); \
118  if (cle != CL_SUCCESS) \
119  av_log(avctx, AV_LOG_ERROR, "Failed to release " \
120  "OpenCL memory: %d.\n", cle); \
121  } \
122 } while(0)
123 
124 /**
125  * release an OpenCL Command Queue
126  */
127 #define CL_RELEASE_QUEUE(q) \
128 do { \
129  if (q) { \
130  cle = clReleaseCommandQueue(q); \
131  if (cle != CL_SUCCESS) \
132  av_log(avctx, AV_LOG_ERROR, "Failed to release " \
133  "OpenCL command queue: %d.\n", cle); \
134  } \
135 } while(0)
136 
137 /**
138  * Enqueue a kernel with the given information.
139  *
140  * Kernel arguments are provided as KernelArg structures and are set in the order
141  * that they are passed.
142  *
143  * Requires the presence of a local cl_int variable named cle and a fail label for error
144  * handling.
145  */
146 #define CL_ENQUEUE_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event, ...) \
147 do { \
148  OpenCLKernelArg args[] = {__VA_ARGS__}; \
149  for (int i = 0; i < FF_ARRAY_ELEMS(args); i++) { \
150  cle = clSetKernelArg(kernel, i, args[i].arg_size, args[i].arg_val); \
151  if (cle != CL_SUCCESS) { \
152  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel " \
153  "argument %d: error %d.\n", i, cle); \
154  err = AVERROR(EIO); \
155  goto fail; \
156  } \
157  } \
158  \
159  cle = clEnqueueNDRangeKernel( \
160  queue, \
161  kernel, \
162  FF_ARRAY_ELEMS(global_work_size), \
163  NULL, \
164  global_work_size, \
165  local_work_size, \
166  0, \
167  NULL, \
168  event \
169  ); \
170  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle); \
171 } while (0)
172 
173 /**
174  * Uses the above macro to enqueue the given kernel and then additionally runs it to
175  * completion via clFinish.
176  *
177  * Requires the presence of a local cl_int variable named cle and a fail label for error
178  * handling.
179  */
180 #define CL_RUN_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event, ...) do { \
181  CL_ENQUEUE_KERNEL_WITH_ARGS( \
182  queue, kernel, global_work_size, local_work_size, event, __VA_ARGS__ \
183  ); \
184  \
185  cle = clFinish(queue); \
186  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle); \
187 } while (0)
188 
189 /**
190  * Create a buffer with the given information.
191  *
192  * The buffer variable in the context structure must be named <buffer_name>.
193  *
194  * Requires the presence of a local cl_int variable named cle and a fail label for error
195  * handling.
196  */
197 #define CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, flags, size, host_ptr) do { \
198  ctx->buffer_name = clCreateBuffer( \
199  ctx->ocf.hwctx->context, \
200  flags, \
201  size, \
202  host_ptr, \
203  &cle \
204  ); \
205  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create buffer %s: %d.\n", #buffer_name, cle); \
206 } while(0)
207 
208 /**
209  * Perform a blocking write to a buffer.
210  *
211  * Requires the presence of a local cl_int variable named cle and a fail label for error
212  * handling.
213  */
214 #define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event) do { \
215  cle = clEnqueueWriteBuffer( \
216  queue, \
217  buffer, \
218  CL_TRUE, \
219  0, \
220  size, \
221  host_ptr, \
222  0, \
223  NULL, \
224  event \
225  ); \
226  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to write buffer to device: %d.\n", cle); \
227 } while(0)
228 
229 /**
230  * Create a buffer with the given information.
231  *
232  * The buffer variable in the context structure must be named <buffer_name>.
233  *
234  * Requires the presence of a local cl_int variable named cle and a fail label for error
235  * handling.
236  */
237 #define CL_CREATE_BUFFER(ctx, buffer_name, size) CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, 0, size, NULL)
238 
239 /**
240  * Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
241  */
243 
244 /**
245  * Check that the input link contains a suitable hardware frames
246  * context and extract the device from it.
247  */
249 
250 /**
251  * Create a suitable hardware frames context for the output.
252  */
254 
255 /**
256  * Initialise an OpenCL filter context.
257  */
259 
260 /**
261  * Uninitialise an OpenCL filter context.
262  */
264 
265 /**
266  * Load a new OpenCL program from strings in memory.
267  *
268  * Creates a new program and compiles it for the current device.
269  * Will log any build errors if compilation fails.
270  */
272  const char **program_source_array,
273  int nb_strings);
274 
275 /**
276  * Load a new OpenCL program from a file.
277  *
278  * Same as ff_opencl_filter_load_program(), but from a file.
279  */
281  const char *filename);
282 
283 /**
284  * Find the work size needed needed for a given plane of an image.
285  */
287  size_t *work_size,
288  AVFrame *frame, int plane,
289  int block_alignment);
290 /**
291  * Print a 3x3 matrix into a buffer as __constant array, which could
292  * be included in an OpenCL program.
293 */
294 
295 void ff_opencl_print_const_matrix_3x3(AVBPrint *buf, const char *name_str,
296  double mat[3][3]);
297 
298 /**
299  * Gets the command start and end times for the given event and returns the
300  * difference (the time that the event took).
301  */
302 cl_ulong ff_opencl_get_event_time(cl_event event);
303 
304 #endif /* AVFILTER_OPENCL_H */
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
hwcontext_opencl.h
OpenCLFilterContext::output_format
enum AVPixelFormat output_format
Definition: opencl.h:45
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:300
ff_opencl_filter_init
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:147
OpenCLKernelArg
Definition: opencl.h:51
AVOpenCLDeviceContext
OpenCL device details.
Definition: hwcontext_opencl.h:63
OpenCLFilterContext::output_width
int output_width
Definition: opencl.h:46
OpenCLFilterContext::device
AVHWDeviceContext * device
Definition: opencl.h:40
AVHWDeviceContext
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:61
ff_opencl_filter_work_size_from_image
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:278
OpenCLFilterContext::program
cl_program program
Definition: opencl.h:43
ff_opencl_filter_config_output
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:96
ff_opencl_filter_uninit
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:156
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:67
ff_opencl_filter_config_input
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it.
Definition: opencl.c:60
OpenCLKernelArg::arg_size
size_t arg_size
Definition: opencl.h:52
ff_opencl_filter_load_program
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:171
buffer.h
bprint.h
ff_opencl_filter_load_program_from_file
int ff_opencl_filter_load_program_from_file(AVFilterContext *avctx, const char *filename)
Load a new OpenCL program from a file.
Definition: opencl.c:219
ff_opencl_get_event_time
cl_ulong ff_opencl_get_event_time(cl_event event)
Gets the command start and end times for the given event and returns the difference (the time that th...
Definition: opencl.c:354
ff_opencl_filter_query_formats
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
Definition: opencl.c:28
pixfmt.h
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
OpenCLKernelArg::arg_val
const void * arg_val
Definition: opencl.h:53
OpenCLFilterContext::output_height
int output_height
Definition: opencl.h:47
OpenCLFilterContext::device_ref
AVBufferRef * device_ref
Definition: opencl.h:39
avfilter.h
OpenCLFilterContext
Definition: opencl.h:36
AVFilterContext
An instance of a filter.
Definition: avfilter.h:338
ff_opencl_print_const_matrix_3x3
void ff_opencl_print_const_matrix_3x3(AVBPrint *buf, const char *name_str, double mat[3][3])
Print a 3x3 matrix into a buffer as __constant array, which could be included in an OpenCL program.
Definition: opencl.c:341
OpenCLFilterContext::hwctx
AVOpenCLDeviceContext * hwctx
Definition: opencl.h:41
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:81
hwcontext.h