doxygen/trunk/vf__pad__cuda_8c_source.html

/*

 * This file is part of FFmpeg.

 *

 * FFmpeg is free software; you can redistribute it and/or

 * modify it under the terms of the GNU Lesser General Public

 * License as published by the Free Software Foundation; either

 * version 2.1 of the License, or (at your option) any later version.

 *

 * FFmpeg is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 * Lesser General Public License for more details.

 *

 * You should have received a copy of the GNU Lesser General Public

 * License along with FFmpeg; if not, write to the Free Software

 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

 */


/**

 * @file

 * CUDA video padding filter

 */


#include <float.h>


#include "filters.h"

#include "libavutil/avstring.h"

#include "libavutil/common.h"

#include "libavutil/cuda_check.h"

#include "libavutil/eval.h"

#include "libavutil/hwcontext.h"

#include "libavutil/hwcontext_cuda_internal.h"

#include "libavutil/imgutils.h"

#include "libavutil/internal.h"

#include "libavutil/opt.h"

#include "libavutil/pixdesc.h"

#include "libavutil/colorspace.h"


#include "cuda/load_helper.h"


#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x)

#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )

#define BLOCK_X 32

#define BLOCK_Y 16


static const enum AVPixelFormat supported_formats[] = {

    AV_PIX_FMT_YUV420P,

    AV_PIX_FMT_YUV444P,

    AV_PIX_FMT_YUVA420P,

    AV_PIX_FMT_YUVA444P,

    AV_PIX_FMT_NV12,

};


typedef struct CUDAPadContext {

    const AVClass *class;


    AVBufferRef *frames_ctx;


    int w, h;       ///< output dimensions, a value of 0 will result in the input size

    int x, y;       ///< offsets of the input area with respect to the padded area

    int in_w, in_h; ///< width and height for the padded input video


    char *w_expr;   ///< width expression

    char *h_expr;   ///< height expression

    char *x_expr;   ///< x offset expression

    char *y_expr;   ///< y offset expression


    uint8_t rgba_color[4];    ///< color for the padding area

    uint8_t parsed_color[4];

    AVRational aspect;


    int eval_mode;


    int last_out_w, last_out_h; ///< used to evaluate the prior output width and height with the incoming frame


    AVCUDADeviceContext *hwctx;

    CUmodule cu_module;

    CUfunction cu_func_uchar;

    CUfunction cu_func_uchar2;

} CUDAPadContext;


static const char *const var_names[] = {

    "in_w",  "iw",

    "in_h",  "ih",

    "out_w", "ow",

    "out_h", "oh",

    "x",

    "y",

    "a",

    "sar",

    "dar",

    "hsub",

    "vsub",

    NULL

};


enum {

    VAR_IN_W,

    VAR_IW,

    VAR_IN_H,

    VAR_IH,

    VAR_OUT_W,

    VAR_OW,

    VAR_OUT_H,

    VAR_OH,

    VAR_X,

    VAR_Y,

    VAR_A,

    VAR_SAR,

    VAR_DAR,

    VAR_HSUB,

    VAR_VSUB,

    VARS_NB

};


enum EvalMode {

    EVAL_MODE_INIT,

    EVAL_MODE_FRAME,

    EVAL_MODE_NB

};


static int eval_expr(AVFilterContext *ctx)

{

    CUDAPadContext *s = ctx->priv;

    AVFilterLink *inlink = ctx->inputs[0];

    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);


    double var_values[VARS_NB], res;

    char *expr;

    int ret;


    var_values[VAR_IN_W]   = var_values[VAR_IW]   = s->in_w;

    var_values[VAR_IN_H]   = var_values[VAR_IH]   = s->in_h;

    var_values[VAR_OUT_W]  = var_values[VAR_OW]  = NAN;

    var_values[VAR_OUT_H]  = var_values[VAR_OH]  = NAN;

    var_values[VAR_A]      = (double)s->in_w / s->in_h;

    var_values[VAR_SAR]    = inlink->sample_aspect_ratio.num ?

                             (double)inlink->sample_aspect_ratio.num /

                             inlink->sample_aspect_ratio.den : 1;

    var_values[VAR_DAR]    = var_values[VAR_A] * var_values[VAR_SAR];

    var_values[VAR_HSUB]   = 1 << desc->log2_chroma_w;

    var_values[VAR_VSUB]   = 1 << desc->log2_chroma_h;


    expr = s->w_expr;

    ret = av_expr_parse_and_eval(&res, expr, var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, ctx);

    if (ret < 0)

        goto fail;


    s->w = res;

    if (s->w < 0) {

        av_log(ctx, AV_LOG_ERROR, "Width expression is negative.\n");

        ret = AVERROR(EINVAL);

        goto fail;

    }


    var_values[VAR_OUT_W] = var_values[VAR_OW] = s->w;


    expr = s->h_expr;

    ret = av_expr_parse_and_eval(&res, expr, var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, ctx);

    if (ret < 0)

        goto fail;


    s->h = res;

    if (s->h < 0) {

        av_log(ctx, AV_LOG_ERROR, "Height expression is negative.\n");

        ret = AVERROR(EINVAL);

        goto fail;

    }

    var_values[VAR_OUT_H] = var_values[VAR_OH] = s->h;


    if (!s->h)

        s->h = s->in_h;


    var_values[VAR_OUT_H] = var_values[VAR_OH] = s->h;


    expr = s->w_expr;

    ret = av_expr_parse_and_eval(&res, expr, var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, ctx);

    if (ret < 0)

        goto fail;


    s->w = res;

    if (s->w < 0) {

        av_log(ctx, AV_LOG_ERROR, "Width expression is negative.\n");

        ret = AVERROR(EINVAL);

        goto fail;

    }

    if (!s->w)

        s->w = s->in_w;


    var_values[VAR_OUT_W] = var_values[VAR_OW] = s->w;


    expr = s->x_expr;

    ret = av_expr_parse_and_eval(&res, expr, var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, ctx);

    if (ret < 0)

        goto fail;


    s->x = res;


    expr = s->y_expr;

    ret = av_expr_parse_and_eval(&res, expr, var_names, var_values, NULL, NULL, NULL, NULL, NULL, 0, ctx);

    if (ret < 0)

        goto fail;


    s->y = res;


    if (s->x < 0 || s->x + s->in_w > s->w) {

        s->x = (s->w - s->in_w) / 2;

        av_log(ctx, AV_LOG_VERBOSE, "centering X offset.\n");

    }


    if (s->y < 0 || s->y + s->in_h > s->h) {

        s->y = (s->h - s->in_h) / 2;

        av_log(ctx, AV_LOG_VERBOSE, "centering Y offset.\n");

    }


    s->w = av_clip(s->w, 1, INT_MAX);

    s->h = av_clip(s->h, 1, INT_MAX);


    if (s->w < s->in_w || s->h < s->in_h) {

        av_log(ctx, AV_LOG_ERROR, "Padded size < input size.\n");

        return AVERROR(EINVAL);

    }


    av_log(ctx, AV_LOG_DEBUG,

           "w:%d h:%d -> w:%d h:%d x:%d y:%d color:0x%02X%02X%02X%02X\n",

           inlink->w, inlink->h, s->w, s->h, s->x, s->y, s->rgba_color[0],

           s->rgba_color[1], s->rgba_color[2], s->rgba_color[3]);


    return 0;


fail:

    av_log(ctx, AV_LOG_ERROR, "Error evaluating '%s'\n", expr);

    return ret;

}


static int cuda_pad_alloc_out_frames_ctx(AVFilterContext *ctx, AVBufferRef **out_frames_ctx, const int width, const int height)

{

    AVFilterLink *inlink = ctx->inputs[0];

    FilterLink *inl = ff_filter_link(inlink);

    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext *)inl->hw_frames_ctx->data;

    int ret;


    *out_frames_ctx = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);

    if (!*out_frames_ctx) {

        return AVERROR(ENOMEM);

    }


    AVHWFramesContext *out_fc = (AVHWFramesContext *)(*out_frames_ctx)->data;

    out_fc->format    = AV_PIX_FMT_CUDA;

    out_fc->sw_format = in_frames_ctx->sw_format;


    out_fc->width     = FFALIGN(width, 32);

    out_fc->height    = FFALIGN(height, 32);


    ret = av_hwframe_ctx_init(*out_frames_ctx);

    if (ret < 0) {

        av_log(ctx, AV_LOG_ERROR, "Failed to init output ctx\n");

        av_buffer_unref(out_frames_ctx);

        return ret;

    }


    return 0;

}


static av_cold int cuda_pad_init(AVFilterContext *ctx)

{

    CUDAPadContext *s = ctx->priv;


    s->last_out_w = -1;

    s->last_out_h = -1;


    return 0;

}


static av_cold void cuda_pad_uninit(AVFilterContext *ctx)

{

    CUDAPadContext *s = ctx->priv;

    CUcontext dummy;


    av_buffer_unref(&s->frames_ctx);


    if (s->hwctx && s->cu_module) {

        CudaFunctions *cu = s->hwctx->internal->cuda_dl;

        AVCUDADeviceContext *device_hwctx = s->hwctx;

        CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));

        CHECK_CU(cu->cuModuleUnload(s->cu_module));

        CHECK_CU(cu->cuCtxPopCurrent(&dummy));

    }


    s->cu_module = NULL;

    s->hwctx = NULL;

}


static av_cold int cuda_pad_load_functions(AVFilterContext *ctx)

{

    CUDAPadContext *s = ctx->priv;

    CudaFunctions *cu = s->hwctx->internal->cuda_dl;

    CUcontext dummy_cu_ctx;

    int ret;


    AVCUDADeviceContext *device_hwctx = s->hwctx;


    extern const unsigned char ff_vf_pad_cuda_ptx_data[];

    extern const unsigned int ff_vf_pad_cuda_ptx_len;


    ret = CHECK_CU(cu->cuCtxPushCurrent(device_hwctx->cuda_ctx));

    if (ret < 0)

        return ret;


    ret = ff_cuda_load_module(ctx, device_hwctx, &s->cu_module,

                              ff_vf_pad_cuda_ptx_data, ff_vf_pad_cuda_ptx_len);

    if (ret < 0) {

        av_log(ctx, AV_LOG_ERROR, "Failed to load CUDA module\n");

        goto end;

    }


    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "pad_uchar"));

    if (ret < 0) {

        av_log(ctx, AV_LOG_ERROR, "Failed to load pad_planar_cuda\n");

        goto end;

    }


    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "pad_uchar2"));

    if (ret < 0)

        av_log(ctx, AV_LOG_ERROR, "Failed to load pad_uv_cuda\n");


end:

    CHECK_CU(cu->cuCtxPopCurrent(&dummy_cu_ctx));


    return ret;

}


static int cuda_pad_config_props(AVFilterLink *outlink)

{

    AVFilterContext *ctx = outlink->src;

    CUDAPadContext *s = ctx->priv;


    AVFilterLink *inlink = ctx->inputs[0];

    FilterLink *inl = ff_filter_link(inlink);


    FilterLink *ol = ff_filter_link(outlink);


    AVHWFramesContext *in_frames_ctx;

    int format_supported = 0;

    int ret;


    s->in_w = inlink->w;

    s->in_h = inlink->h;

    ret = eval_expr(ctx);

    if (ret < 0)

        return ret;


    if (!inl->hw_frames_ctx) {

        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");

        return AVERROR(EINVAL);

    }


    in_frames_ctx = (AVHWFramesContext *)inl->hw_frames_ctx->data;

    s->hwctx = in_frames_ctx->device_ctx->hwctx;


    for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {

        if (in_frames_ctx->sw_format == supported_formats[i]) {

            format_supported = 1;

            break;

        }

    }

    if (!format_supported) {

        av_log(ctx, AV_LOG_ERROR, "Unsupported input format.\n");

        return AVERROR(EINVAL);

    }


    s->parsed_color[0] = RGB_TO_Y_BT709(s->rgba_color[0], s->rgba_color[1], s->rgba_color[2]);

    s->parsed_color[1] = RGB_TO_U_BT709(s->rgba_color[0], s->rgba_color[1], s->rgba_color[2], 0);

    s->parsed_color[2] = RGB_TO_V_BT709(s->rgba_color[0], s->rgba_color[1], s->rgba_color[2], 0);

    s->parsed_color[3] = s->rgba_color[3];


    ret = cuda_pad_alloc_out_frames_ctx(ctx, &s->frames_ctx, s->w, s->h);

    if (ret < 0)

        return ret;


    ol->hw_frames_ctx = av_buffer_ref(s->frames_ctx);

    if (!ol->hw_frames_ctx)

        return AVERROR(ENOMEM);


    outlink->w         = s->w;

    outlink->h         = s->h;

    outlink->time_base = inlink->time_base;

    outlink->format    = AV_PIX_FMT_CUDA;


    s->last_out_w = s->w;

    s->last_out_h = s->h;


    ret = cuda_pad_load_functions(ctx);

    if (ret < 0)

        return ret;


    return 0;

}


static int cuda_pad_pad(AVFilterContext *ctx, AVFrame *out, const AVFrame *in)

{

    CUDAPadContext *s = ctx->priv;

    FilterLink *inl = ff_filter_link(ctx->inputs[0]);


    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext *)inl->hw_frames_ctx->data;

    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(in_frames_ctx->sw_format);


    CudaFunctions *cu = s->hwctx->internal->cuda_dl;

    AVCUDADeviceContext *device_hwctx = s->hwctx;

    int ret;


    const int nb_planes = av_pix_fmt_count_planes(in_frames_ctx->sw_format);

    for (int plane = 0; plane < nb_planes; plane++) {

        const AVComponentDescriptor *cur_comp = &pixdesc->comp[0];

        for (int comp = 1; comp < pixdesc->nb_components && cur_comp->plane != plane; comp++)

            cur_comp = &pixdesc->comp[comp];


        int hsub = (plane == 1 || plane == 2) ? pixdesc->log2_chroma_w : 0;

        int vsub = (plane == 1 || plane == 2) ? pixdesc->log2_chroma_h : 0;


        int src_w = AV_CEIL_RSHIFT(s->in_w, hsub);

        int src_h = AV_CEIL_RSHIFT(s->in_h, vsub);


        int dst_w = AV_CEIL_RSHIFT(s->w, hsub);

        int dst_h = AV_CEIL_RSHIFT(s->h, vsub);


        int y_plane_offset = AV_CEIL_RSHIFT(s->y, vsub);

        int x_plane_offset = AV_CEIL_RSHIFT(s->x, hsub);


        if (x_plane_offset + src_w > dst_w || y_plane_offset + src_h > dst_h) {

            av_log(ctx, AV_LOG_ERROR,

                   "ROI out of bounds in plane %d: offset=(%d,%d) in=(%dx%d) "

                   "out=(%dx%d)\n",

                   plane, x_plane_offset, y_plane_offset, src_w, src_h, dst_w, dst_h);

            return AVERROR(EINVAL);

        }


        int dst_linesize = out->linesize[plane] / cur_comp->step;

        int src_linesize = in->linesize[plane] / cur_comp->step;


        CUdeviceptr d_dst = (CUdeviceptr)out->data[plane];

        CUdeviceptr d_src = (CUdeviceptr)in->data[plane];


        CUfunction cuda_func;


        if (cur_comp->step == 1 && cur_comp->depth == 8)

            cuda_func = s->cu_func_uchar;

        else if(cur_comp->step == 2 && cur_comp->depth == 8)

            cuda_func = s->cu_func_uchar2;

        else

            return AVERROR_BUG;


        void *kernel_args[] = {

            &d_dst, &dst_linesize, &dst_w, &dst_h,

            &d_src, &src_linesize, &src_w, &src_h,

            &x_plane_offset, &y_plane_offset, &s->parsed_color[plane]

        };


        unsigned int grid_x = DIV_UP(dst_w, BLOCK_X);

        unsigned int grid_y = DIV_UP(dst_h, BLOCK_Y);


        ret = CHECK_CU(cu->cuLaunchKernel(cuda_func, grid_x, grid_y, 1,

                                          BLOCK_X, BLOCK_Y, 1,

                                          0, s->hwctx->stream, kernel_args, NULL));


        if (ret < 0) {

            av_log(ctx, AV_LOG_ERROR, "Failed to launch kernel for plane %d\n", plane);

            return ret;

        }

    }


    return 0;

}


static int cuda_pad_filter_frame(AVFilterLink *inlink, AVFrame *in)

{

    AVFilterContext *ctx = inlink->dst;

    CUDAPadContext *s = ctx->priv;

    AVFilterLink *outlink = ctx->outputs[0];


    FilterLink *outl = ff_filter_link(outlink);


    AVHWFramesContext *out_frames_ctx = (AVHWFramesContext *)outl->hw_frames_ctx->data;

    AVCUDADeviceContext *device_hwctx = out_frames_ctx->device_ctx->hwctx;


    int ret;


    if (s->eval_mode == EVAL_MODE_FRAME) {

        s->in_w   = in->width;

        s->in_h   = in->height;

        s->aspect = in->sample_aspect_ratio;


        ret = eval_expr(ctx);

        if (ret < 0) {

            av_frame_free(&in);

            return ret;

        }

    }


    if (s->x == 0 && s->y == 0 &&

        s->w == in->width && s->h == in->height) {

        av_log(ctx, AV_LOG_DEBUG, "No border. Passing the frame unmodified.\n");

        s->last_out_w = s->w;

        s->last_out_h = s->h;

        return ff_filter_frame(outlink, in);

    }


    if (s->w != s->last_out_w || s->h != s->last_out_h) {


        av_buffer_unref(&s->frames_ctx);


        ret = cuda_pad_alloc_out_frames_ctx(ctx, &s->frames_ctx, s->w, s->h);

        if (ret < 0)

            return ret;


        av_buffer_unref(&outl->hw_frames_ctx);

        outl->hw_frames_ctx = av_buffer_ref(s->frames_ctx);

        if (!outl->hw_frames_ctx) {

            av_frame_free(&in);

            av_log(ctx, AV_LOG_ERROR, "Failed to allocate output frame context.\n");

            return AVERROR(ENOMEM);

        }

        outlink->w = s->w;

        outlink->h = s->h;


        s->last_out_w = s->w;

        s->last_out_h = s->h;

    }


    AVFrame *out = av_frame_alloc();

    if (!out) {

        av_frame_free(&in);

        av_log(ctx, AV_LOG_ERROR, "Failed to allocate output AVFrame.\n");

        return AVERROR(ENOMEM);

    }

    ret = av_hwframe_get_buffer(outl->hw_frames_ctx, out, 0);

    if (ret < 0) {

        av_log(ctx, AV_LOG_ERROR, "Unable to get output buffer: %s\n",

               av_err2str(ret));

        av_frame_free(&out);

        av_frame_free(&in);

        return ret;

    }


    CUcontext dummy;

    ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(

        device_hwctx->cuda_ctx));

    if (ret < 0) {

        av_frame_free(&out);

        av_frame_free(&in);

        return ret;

    }


    ret = cuda_pad_pad(ctx, out, in);


    CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));


    if (ret < 0) {

        av_frame_free(&out);

        av_frame_free(&in);

        return ret;

    }


    av_frame_copy_props(out, in);

    out->width  = s->w;

    out->height = s->h;


    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,

              (int64_t)in->sample_aspect_ratio.num * out->height * in->width,

              (int64_t)in->sample_aspect_ratio.den * out->width * in->height,

              INT_MAX);


    av_frame_free(&in);

    return ff_filter_frame(outlink, out);

}


#define OFFSET(x) offsetof(CUDAPadContext, x)

#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)


static const AVOption cuda_pad_options[] = {

    { "width",  "set the pad area width expression",                             OFFSET(w_expr),     AV_OPT_TYPE_STRING,   {.str = "iw"},       0, 0,        FLAGS },

    { "w",      "set the pad area width expression",                             OFFSET(w_expr),     AV_OPT_TYPE_STRING,   {.str = "iw"},       0, 0,        FLAGS },

    { "height", "set the pad area height expression",                            OFFSET(h_expr),     AV_OPT_TYPE_STRING,   {.str = "ih"},       0, 0,        FLAGS },

    { "h",      "set the pad area height expression",                            OFFSET(h_expr),     AV_OPT_TYPE_STRING,   {.str = "ih"},       0, 0,        FLAGS },

    { "x",      "set the x offset expression for the input image position",      OFFSET(x_expr),     AV_OPT_TYPE_STRING,   {.str = "0"},        0, 0,        FLAGS },

    { "y",      "set the y offset expression for the input image position",      OFFSET(y_expr),     AV_OPT_TYPE_STRING,   {.str = "0"},        0, 0,        FLAGS },

    { "color",  "set the color of the padded area border",                       OFFSET(rgba_color), AV_OPT_TYPE_COLOR,    {.str = "black"},    .flags =      FLAGS },

    { "eval",   "specify when to evaluate expressions",                          OFFSET(eval_mode),  AV_OPT_TYPE_INT,      {.i64 = EVAL_MODE_INIT}, 0, EVAL_MODE_NB-1, FLAGS, .unit = "eval" },

         { "init",  "eval expressions once during initialization", 0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_INIT},  .flags = FLAGS, .unit = "eval" },

         { "frame", "eval expressions during initialization and per-frame", 0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_FRAME}, .flags = FLAGS, .unit = "eval" },

    { "aspect", "pad to fit an aspect instead of a resolution",                  OFFSET(aspect),     AV_OPT_TYPE_RATIONAL, {.dbl = 0},        0, DBL_MAX,    FLAGS },

    { NULL }

};


static const AVClass cuda_pad_class = {

    .class_name = "pad_cuda",

    .item_name  = av_default_item_name,

    .option     = cuda_pad_options,

    .version    = LIBAVUTIL_VERSION_INT,

};


static const AVFilterPad cuda_pad_inputs[] = {{

        .name         = "default",

        .type         = AVMEDIA_TYPE_VIDEO,

        .filter_frame = cuda_pad_filter_frame

}};


static const AVFilterPad cuda_pad_outputs[] = {{

        .name         = "default",

        .type         = AVMEDIA_TYPE_VIDEO,

        .config_props = cuda_pad_config_props,

}};


const FFFilter ff_vf_pad_cuda = {

    .p.name         = "pad_cuda",

    .p.description  = NULL_IF_CONFIG_SMALL("CUDA-based GPU padding filter"),

    .init           = cuda_pad_init,

    .uninit         = cuda_pad_uninit,


    .p.priv_class   = &cuda_pad_class,


    FILTER_INPUTS(cuda_pad_inputs),

    FILTER_OUTPUTS(cuda_pad_outputs),


    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA),


    .priv_size      = sizeof(CUDAPadContext),

    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,

};