FFmpeg: libavfilter/vf_removelogo.c Source File

00001 /*
00002  * Copyright (c) 2005 Robert Edele <yartrebo@earthlink.net>
00003  * Copyright (c) 2012 Stefano Sabatini
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00072 #include "libavutil/imgutils.h"
00073 #include "avfilter.h"
00074 #include "formats.h"
00075 #include "video.h"
00076 #include "bbox.h"
00077 #include "lavfutils.h"
00078 #include "lswsutils.h"
00079 
00080 typedef struct {
00081     /* Stores our collection of masks. The first is for an array of
00082        the second for the y axis, and the third for the x axis. */
00083     int ***mask;
00084     int max_mask_size;
00085     int mask_w, mask_h;
00086 
00087     uint8_t      *full_mask_data;
00088     FFBoundingBox full_mask_bbox;
00089     uint8_t      *half_mask_data;
00090     FFBoundingBox half_mask_bbox;
00091 } RemovelogoContext;
00092 
00103 #define apply_mask_fudge_factor(x) (((x) >> 2) + x)
00104 
00119 static void convert_mask_to_strength_mask(uint8_t *data, int linesize,
00120                                           int w, int h, int min_val,
00121                                           int *max_mask_size)
00122 {
00123     int x, y;
00124 
00125     /* How many times we've gone through the loop. Used in the
00126        in-place erosion algorithm and to get us max_mask_size later on. */
00127     int current_pass = 0;
00128 
00129     /* set all non-zero values to 1 */
00130     for (y = 0; y < h; y++)
00131         for (x = 0; x < w; x++)
00132             data[y*linesize + x] = data[y*linesize + x] > min_val;
00133 
00134     /* For each pass, if a pixel is itself the same value as the
00135        current pass, and its four neighbors are too, then it is
00136        incremented. If no pixels are incremented by the end of the
00137        pass, then we go again. Edge pixels are counted as always
00138        excluded (this should be true anyway for any sane mask, but if
00139        it isn't this will ensure that we eventually exit). */
00140     while (1) {
00141         /* If this doesn't get set by the end of this pass, then we're done. */
00142         int has_anything_changed = 0;
00143         uint8_t *current_pixel0 = data, *current_pixel;
00144         current_pass++;
00145 
00146         for (y = 1; y < h-1; y++) {
00147             current_pixel = current_pixel0;
00148             for (x = 1; x < w-1; x++) {
00149                 /* Apply the in-place erosion transform. It is based
00150                    on the following two premises:
00151                    1 - Any pixel that fails 1 erosion will fail all
00152                        future erosions.
00153 
00154                    2 - Only pixels having survived all erosions up to
00155                        the present will be >= to current_pass.
00156                    It doesn't matter if it survived the current pass,
00157                    failed it, or hasn't been tested yet.  By using >=
00158                    instead of ==, we allow the algorithm to work in
00159                    place. */
00160                 if ( *current_pixel      >= current_pass &&
00161                     *(current_pixel + 1) >= current_pass &&
00162                     *(current_pixel - 1) >= current_pass &&
00163                     *(current_pixel + w) >= current_pass &&
00164                     *(current_pixel - w) >= current_pass) {
00165                     /* Increment the value since it still has not been
00166                      * eroded, as evidenced by the if statement that
00167                      * just evaluated to true. */
00168                     (*current_pixel)++;
00169                     has_anything_changed = 1;
00170                 }
00171                 current_pixel++;
00172             }
00173             current_pixel0 += linesize;
00174         }
00175         if (!has_anything_changed)
00176             break;
00177     }
00178 
00179     /* Apply the fudge factor, which will increase the size of the
00180      * mask a little to reduce jitter at the cost of more blur. */
00181     for (y = 1; y < h - 1; y++)
00182         for (x = 1; x < w - 1; x++)
00183             data[(y * linesize) + x] = apply_mask_fudge_factor(data[(y * linesize) + x]);
00184 
00185     /* As a side-effect, we now know the maximum mask size, which
00186      * we'll use to generate our masks. */
00187     /* Apply the fudge factor to this number too, since we must ensure
00188      * that enough masks are generated. */
00189     *max_mask_size = apply_mask_fudge_factor(current_pass + 1);
00190 }
00191 
00192 static int query_formats(AVFilterContext *ctx)
00193 {
00194     enum PixelFormat pix_fmts[] = { PIX_FMT_YUV420P, PIX_FMT_NONE };
00195     ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
00196     return 0;
00197 }
00198 
00199 static int load_mask(uint8_t **mask, int *w, int *h,
00200                      const char *filename, void *log_ctx)
00201 {
00202     int ret;
00203     enum PixelFormat pix_fmt;
00204     uint8_t *src_data[4], *gray_data[4];
00205     int src_linesize[4], gray_linesize[4];
00206 
00207     /* load image from file */
00208     if ((ret = ff_load_image(src_data, src_linesize, w, h, &pix_fmt, filename, log_ctx)) < 0)
00209         return ret;
00210 
00211     /* convert the image to GRAY8 */
00212     if ((ret = ff_scale_image(gray_data, gray_linesize, *w, *h, PIX_FMT_GRAY8,
00213                               src_data, src_linesize, *w, *h, pix_fmt,
00214                               log_ctx)) < 0)
00215         goto end;
00216 
00217     /* copy mask to a newly allocated array */
00218     *mask = av_malloc(*w * *h);
00219     if (!*mask)
00220         ret = AVERROR(ENOMEM);
00221     av_image_copy_plane(*mask, *w, gray_data[0], gray_linesize[0], *w, *h);
00222 
00223 end:
00224     av_free(src_data[0]);
00225     av_free(gray_data[0]);
00226     return ret;
00227 }
00228 
00240 static void generate_half_size_image(const uint8_t *src_data, int src_linesize,
00241                                      uint8_t *dst_data, int dst_linesize,
00242                                      int src_w, int src_h,
00243                                      int *max_mask_size)
00244 {
00245     int x, y;
00246 
00247     /* Copy over the image data, using the average of 4 pixels for to
00248      * calculate each downsampled pixel. */
00249     for (y = 0; y < src_h/2; y++) {
00250         for (x = 0; x < src_w/2; x++) {
00251             /* Set the pixel if there exists a non-zero value in the
00252              * source pixels, else clear it. */
00253             dst_data[(y * dst_linesize) + x] =
00254                 src_data[((y << 1) * src_linesize) + (x << 1)] ||
00255                 src_data[((y << 1) * src_linesize) + (x << 1) + 1] ||
00256                 src_data[(((y << 1) + 1) * src_linesize) + (x << 1)] ||
00257                 src_data[(((y << 1) + 1) * src_linesize) + (x << 1) + 1];
00258             dst_data[(y * dst_linesize) + x] = FFMIN(1, dst_data[(y * dst_linesize) + x]);
00259         }
00260     }
00261 
00262     convert_mask_to_strength_mask(dst_data, dst_linesize,
00263                                   src_w/2, src_h/2, 0, max_mask_size);
00264 }
00265 
00266 static av_cold int init(AVFilterContext *ctx, const char *args)
00267 {
00268     RemovelogoContext *removelogo = ctx->priv;
00269     int ***mask;
00270     int ret = 0;
00271     int a, b, c, w, h;
00272     int full_max_mask_size, half_max_mask_size;
00273 
00274     if (!args) {
00275         av_log(ctx, AV_LOG_ERROR, "An image file must be specified as argument\n");
00276         return AVERROR(EINVAL);
00277     }
00278 
00279     /* Load our mask image. */
00280     if ((ret = load_mask(&removelogo->full_mask_data, &w, &h, args, ctx)) < 0)
00281         return ret;
00282     removelogo->mask_w = w;
00283     removelogo->mask_h = h;
00284 
00285     convert_mask_to_strength_mask(removelogo->full_mask_data, w, w, h,
00286                                   16, &full_max_mask_size);
00287 
00288     /* Create the scaled down mask image for the chroma planes. */
00289     if (!(removelogo->half_mask_data = av_mallocz(w/2 * h/2)))
00290         return AVERROR(ENOMEM);
00291     generate_half_size_image(removelogo->full_mask_data, w,
00292                              removelogo->half_mask_data, w/2,
00293                              w, h, &half_max_mask_size);
00294 
00295     removelogo->max_mask_size = FFMAX(full_max_mask_size, half_max_mask_size);
00296 
00297     /* Create a circular mask for each size up to max_mask_size. When
00298        the filter is applied, the mask size is determined on a pixel
00299        by pixel basis, with pixels nearer the edge of the logo getting
00300        smaller mask sizes. */
00301     mask = (int ***)av_malloc(sizeof(int **) * (removelogo->max_mask_size + 1));
00302     if (!mask)
00303         return AVERROR(ENOMEM);
00304 
00305     for (a = 0; a <= removelogo->max_mask_size; a++) {
00306         mask[a] = (int **)av_malloc(sizeof(int *) * ((a * 2) + 1));
00307         if (!mask[a])
00308             return AVERROR(ENOMEM);
00309         for (b = -a; b <= a; b++) {
00310             mask[a][b + a] = (int *)av_malloc(sizeof(int) * ((a * 2) + 1));
00311             if (!mask[a][b + a])
00312                 return AVERROR(ENOMEM);
00313             for (c = -a; c <= a; c++) {
00314                 if ((b * b) + (c * c) <= (a * a)) /* Circular 0/1 mask. */
00315                     mask[a][b + a][c + a] = 1;
00316                 else
00317                     mask[a][b + a][c + a] = 0;
00318             }
00319         }
00320     }
00321     removelogo->mask = mask;
00322 
00323     /* Calculate our bounding rectangles, which determine in what
00324      * region the logo resides for faster processing. */
00325     ff_calculate_bounding_box(&removelogo->full_mask_bbox, removelogo->full_mask_data, w, w, h, 0);
00326     ff_calculate_bounding_box(&removelogo->half_mask_bbox, removelogo->half_mask_data, w/2, w/2, h/2, 0);
00327 
00328 #define SHOW_LOGO_INFO(mask_type)                                       \
00329     av_log(ctx, AV_LOG_VERBOSE, #mask_type " x1:%d x2:%d y1:%d y2:%d max_mask_size:%d\n", \
00330            removelogo->mask_type##_mask_bbox.x1, removelogo->mask_type##_mask_bbox.x2, \
00331            removelogo->mask_type##_mask_bbox.y1, removelogo->mask_type##_mask_bbox.y2, \
00332            mask_type##_max_mask_size);
00333     SHOW_LOGO_INFO(full);
00334     SHOW_LOGO_INFO(half);
00335 
00336     return 0;
00337 }
00338 
00339 static int config_props_input(AVFilterLink *inlink)
00340 {
00341     AVFilterContext *ctx = inlink->dst;
00342     RemovelogoContext *removelogo = ctx->priv;
00343 
00344     if (inlink->w != removelogo->mask_w || inlink->h != removelogo->mask_h) {
00345         av_log(ctx, AV_LOG_INFO,
00346                "Mask image size %dx%d does not match with the input video size %dx%d\n",
00347                removelogo->mask_w, removelogo->mask_h, inlink->w, inlink->h);
00348         return AVERROR(EINVAL);
00349     }
00350 
00351     return 0;
00352 }
00353 
00368 static unsigned int blur_pixel(int ***mask,
00369                                const uint8_t *mask_data, int mask_linesize,
00370                                uint8_t       *image_data, int image_linesize,
00371                                int w, int h, int x, int y)
00372 {
00373     /* Mask size tells how large a circle to use. The radius is about
00374      * (slightly larger than) mask size. */
00375     int mask_size;
00376     int start_posx, start_posy, end_posx, end_posy;
00377     int i, j;
00378     unsigned int accumulator = 0, divisor = 0;
00379     /* What pixel we are reading out of the circular blur mask. */
00380     const uint8_t *image_read_position;
00381     /* What pixel we are reading out of the filter image. */
00382     const uint8_t *mask_read_position;
00383 
00384     /* Prepare our bounding rectangle and clip it if need be. */
00385     mask_size  = mask_data[y * mask_linesize + x];
00386     start_posx = FFMAX(0, x - mask_size);
00387     start_posy = FFMAX(0, y - mask_size);
00388     end_posx   = FFMIN(w - 1, x + mask_size);
00389     end_posy   = FFMIN(h - 1, y + mask_size);
00390 
00391     image_read_position = image_data + image_linesize * start_posy + start_posx;
00392     mask_read_position  = mask_data  + mask_linesize  * start_posy + start_posx;
00393 
00394     for (j = start_posy; j <= end_posy; j++) {
00395         for (i = start_posx; i <= end_posx; i++) {
00396             /* Check if this pixel is in the mask or not. Only use the
00397              * pixel if it is not. */
00398             if (!(*mask_read_position) && mask[mask_size][i - start_posx][j - start_posy]) {
00399                 accumulator += *image_read_position;
00400                 divisor++;
00401             }
00402 
00403             image_read_position++;
00404             mask_read_position++;
00405         }
00406 
00407         image_read_position += (image_linesize - ((end_posx + 1) - start_posx));
00408         mask_read_position  += (mask_linesize - ((end_posx + 1) - start_posx));
00409     }
00410 
00411     /* If divisor is 0, it means that not a single pixel is outside of
00412        the logo, so we have no data.  Else we need to normalise the
00413        data using the divisor. */
00414     return divisor == 0 ? 255:
00415         (accumulator + (divisor / 2)) / divisor;  /* divide, taking into account average rounding error */
00416 }
00417 
00441 static void blur_image(int ***mask,
00442                        const uint8_t *src_data,  int src_linesize,
00443                              uint8_t *dst_data,  int dst_linesize,
00444                        const uint8_t *mask_data, int mask_linesize,
00445                        int w, int h, int direct,
00446                        FFBoundingBox *bbox)
00447 {
00448     int x, y;
00449     uint8_t *dst_line;
00450     const uint8_t *src_line;
00451 
00452     if (!direct)
00453         av_image_copy_plane(dst_data, dst_linesize, src_data, src_linesize, w, h);
00454 
00455     for (y = bbox->y1; y <= bbox->y2; y++) {
00456         src_line = src_data + src_linesize * y;
00457         dst_line = dst_data + dst_linesize * y;
00458 
00459         for (x = bbox->x1; x <= bbox->x2; x++) {
00460              if (mask_data[y * mask_linesize + x]) {
00461                 /* Only process if we are in the mask. */
00462                  dst_line[x] = blur_pixel(mask,
00463                                           mask_data, mask_linesize,
00464                                           dst_data, dst_linesize,
00465                                           w, h, x, y);
00466             } else {
00467                 /* Else just copy the data. */
00468                 if (!direct)
00469                     dst_line[x] = src_line[x];
00470             }
00471         }
00472     }
00473 }
00474 
00475 static int start_frame(AVFilterLink *inlink, AVFilterBufferRef *inpicref)
00476 {
00477     AVFilterLink *outlink = inlink->dst->outputs[0];
00478     AVFilterBufferRef *outpicref;
00479 
00480     outpicref = inpicref;
00481 
00482     outlink->out_buf = outpicref;
00483     return ff_start_frame(outlink, avfilter_ref_buffer(outpicref, ~0));
00484 }
00485 
00486 static int end_frame(AVFilterLink *inlink)
00487 {
00488     RemovelogoContext *removelogo = inlink->dst->priv;
00489     AVFilterLink *outlink = inlink->dst->outputs[0];
00490     AVFilterBufferRef *inpicref  = inlink ->cur_buf;
00491     AVFilterBufferRef *outpicref = outlink->out_buf;
00492     int direct = inpicref == outpicref;
00493 
00494     blur_image(removelogo->mask,
00495                inpicref ->data[0], inpicref ->linesize[0],
00496                outpicref->data[0], outpicref->linesize[0],
00497                removelogo->full_mask_data, inlink->w,
00498                inlink->w, inlink->h, direct, &removelogo->full_mask_bbox);
00499     blur_image(removelogo->mask,
00500                inpicref ->data[1], inpicref ->linesize[1],
00501                outpicref->data[1], outpicref->linesize[1],
00502                removelogo->half_mask_data, inlink->w/2,
00503                inlink->w/2, inlink->h/2, direct, &removelogo->half_mask_bbox);
00504     blur_image(removelogo->mask,
00505                inpicref ->data[2], inpicref ->linesize[2],
00506                outpicref->data[2], outpicref->linesize[2],
00507                removelogo->half_mask_data, inlink->w/2,
00508                inlink->w/2, inlink->h/2, direct, &removelogo->half_mask_bbox);
00509 
00510     ff_draw_slice(outlink, 0, inlink->h, 1);
00511     return ff_end_frame(outlink);
00512 }
00513 
00514 static void uninit(AVFilterContext *ctx)
00515 {
00516     RemovelogoContext *removelogo = ctx->priv;
00517     int a, b;
00518 
00519     av_freep(&removelogo->full_mask_data);
00520     av_freep(&removelogo->half_mask_data);
00521 
00522     if (removelogo->mask) {
00523         /* Loop through each mask. */
00524         for (a = 0; a <= removelogo->max_mask_size; a++) {
00525             /* Loop through each scanline in a mask. */
00526             for (b = -a; b <= a; b++) {
00527                 av_free(removelogo->mask[a][b + a]); /* Free a scanline. */
00528             }
00529             av_free(removelogo->mask[a]);
00530         }
00531         /* Free the array of pointers pointing to the masks. */
00532         av_freep(&removelogo->mask);
00533     }
00534 }
00535 
00536 static int null_draw_slice(AVFilterLink *link, int y, int h, int slice_dir) { return 0; }
00537 
00538 AVFilter avfilter_vf_removelogo = {
00539     .name          = "removelogo",
00540     .description   = NULL_IF_CONFIG_SMALL("Remove a TV logo based on a mask image."),
00541     .priv_size     = sizeof(RemovelogoContext),
00542     .init          = init,
00543     .uninit        = uninit,
00544     .query_formats = query_formats,
00545 
00546     .inputs = (const AVFilterPad[]) {
00547         { .name             = "default",
00548           .type             = AVMEDIA_TYPE_VIDEO,
00549           .get_video_buffer = ff_null_get_video_buffer,
00550           .config_props     = config_props_input,
00551           .draw_slice       = null_draw_slice,
00552           .start_frame      = start_frame,
00553           .end_frame        = end_frame,
00554           .min_perms        = AV_PERM_WRITE | AV_PERM_READ },
00555         { .name = NULL }
00556     },
00557     .outputs = (const AVFilterPad[]) {
00558         { .name             = "default",
00559           .type             = AVMEDIA_TYPE_VIDEO, },
00560         { .name = NULL }
00561     },
00562 };