Go to the documentation of this file.
58 #define OFFSET(x) offsetof(DnnProcessingContext, x)
59 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
63 #if (CONFIG_LIBTENSORFLOW == 1)
78 if (!
ctx->model_filename) {
82 if (!
ctx->model_inputname) {
86 if (!
ctx->model_outputname) {
92 if (!
ctx->dnn_module) {
96 if (!
ctx->dnn_module->load_model) {
101 ctx->model = (
ctx->dnn_module->load_model)(
ctx->model_filename);
123 #define LOG_FORMAT_CHANNEL_MISMATCH() \
124 av_log(ctx, AV_LOG_ERROR, \
125 "the frame's format %s does not match " \
126 "the model input channel %d\n", \
127 av_get_pix_fmt_name(fmt), \
128 model_input->channels);
200 result =
ctx->model->get_input(
ctx->model->model, &model_input,
ctx->model_inputname);
214 ctx->input.dt = model_input.
dt;
216 result = (
ctx->model->set_input_output)(
ctx->model->model,
217 &
ctx->input,
ctx->model_inputname,
218 (
const char **)&
ctx->model_outputname, 1);
289 ctx->sws_uv_height = sws_src_h;
307 result = (
ctx->dnn_module->execute_model)(
ctx->model, &
ctx->output, 1);
313 outlink->
w =
ctx->output.width;
314 outlink->
h =
ctx->output.height;
326 switch (
frame->format) {
332 (
const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0});
337 bytewidth,
frame->height);
344 bytewidth,
frame->height);
353 (
const int [4]){frame->width * sizeof(float), 0, 0, 0});
367 switch (
frame->format) {
371 sws_scale(
ctx->sws_grayf32_to_gray8, (
const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0},
372 (
const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0},
378 dnn_output->
data, bytewidth,
379 bytewidth,
frame->height);
387 dnn_output->
data, bytewidth,
388 bytewidth,
frame->height);
393 dnn_output->
data, bytewidth,
394 bytewidth,
frame->height);
401 sws_scale(
ctx->sws_grayf32_to_gray8, (
const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0},
402 (
const int[4]){frame->width * sizeof(float), 0, 0, 0},
424 if (!
ctx->sws_uv_scale) {
428 for (
int i = 1;
i < 3; ++
i) {
431 in->data[
i],
in->linesize[
i],
432 bytewidth, uv_height);
436 0,
ctx->sws_uv_height,
out->data + 1,
out->linesize + 1);
438 0,
ctx->sws_uv_height,
out->data + 2,
out->linesize + 2);
454 dnn_result = (
ctx->dnn_module->execute_model)(
ctx->model, &
ctx->output, 1);
511 .
name =
"dnn_processing",
519 .priv_class = &dnn_processing_class,
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
static av_cold void uninit(AVFilterContext *ctx)
static const AVOption dnn_processing_options[]
AVPixelFormat
Pixel format.
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
This structure describes decoded (raw) audio or video data.
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
swscale wrapper, so we don't need to export the SwsContext.
static int config_input(AVFilterLink *inlink)
const char * name
Filter name.
A link between two filters.
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
struct SwsContext * sws_grayf32_to_gray8
AVFilter ff_vf_dnn_processing
A filter pad used for either input or output.
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
#define check(x, y, S, v)
static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
#define AV_CEIL_RSHIFT(a, b)
#define av_assert0(cond)
assert() equivalent, that is always enabled.
static const AVFilterPad outputs[]
static enum AVPixelFormat pix_fmts[]
static enum AVPixelFormat pix_fmt
static int copy_from_frame_to_dnn(DnnProcessingContext *ctx, const AVFrame *frame)
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
#define AV_PIX_FMT_GRAYF32
static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your context
AVFILTER_DEFINE_CLASS(dnn_processing)
Describe the class of an AVClass context structure.
and forward the result(frame or status change) to the corresponding input. If nothing is possible
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
static av_cold int init(AVFilterContext *context)
static const AVFilterPad dnn_processing_inputs[]
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
static int prepare_sws_context(AVFilterLink *outlink)
struct SwsContext * sws_gray8_to_grayf32
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
static int query_formats(AVFilterContext *context)
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
struct SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
AVFilterContext * src
source filter
int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)
Compute the size of an image line with format pix_fmt and width width for the plane plane.
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
DNNBackendType backend_type
#define i(width, name, range_min, range_max)
int w
agreed upon image width
const char * name
Pad name.
DNNModule * ff_get_dnn_module(DNNBackendType backend_type)
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
int h
agreed upon image height
static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame)
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
#define LOG_FORMAT_CHANNEL_MISMATCH()
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
struct SwsContext * sws_uv_scale
static const AVFilterPad dnn_processing_outputs[]
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
static int config_output(AVFilterLink *outlink)