FFmpeg
vf_deshake.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010 Georg Martius <georg.martius@web.de>
3  * Copyright (C) 2010 Daniel G. Taylor <dan@programmer-art.org>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * fast deshake / depan video filter
25  *
26  * SAD block-matching motion compensation to fix small changes in
27  * horizontal and/or vertical shift. This filter helps remove camera shake
28  * from hand-holding a camera, bumping a tripod, moving on a vehicle, etc.
29  *
30  * Algorithm:
31  * - For each frame with one previous reference frame
32  * - For each block in the frame
33  * - If contrast > threshold then find likely motion vector
34  * - For all found motion vectors
35  * - Find most common, store as global motion vector
36  * - Find most likely rotation angle
37  * - Transform image along global motion
38  *
39  * TODO:
40  * - Fill frame edges based on previous/next reference frames
41  * - Fill frame edges by stretching image near the edges?
42  * - Can this be done quickly and look decent?
43  *
44  * Dark Shikari links to http://wiki.videolan.org/SoC_x264_2010#GPU_Motion_Estimation_2
45  * for an algorithm similar to what could be used here to get the gmv
46  * It requires only a couple diamond searches + fast downscaling
47  *
48  * Special thanks to Jason Kotenko for his help with the algorithm and my
49  * inability to see simple errors in C code.
50  */
51 
52 #include "avfilter.h"
53 #include "internal.h"
54 #include "transform.h"
55 #include "video.h"
56 #include "libavutil/common.h"
57 #include "libavutil/file_open.h"
58 #include "libavutil/mem.h"
59 #include "libavutil/opt.h"
60 #include "libavutil/pixdesc.h"
61 #include "libavutil/pixelutils.h"
62 #include "libavutil/qsort.h"
63 
64 
66  EXHAUSTIVE, ///< Search all possible positions
67  SMART_EXHAUSTIVE, ///< Search most possible positions (faster)
69 };
70 
71 typedef struct IntMotionVector {
72  int x; ///< Horizontal shift
73  int y; ///< Vertical shift
75 
76 typedef struct MotionVector {
77  double x; ///< Horizontal shift
78  double y; ///< Vertical shift
79 } MotionVector;
80 
81 typedef struct Transform {
82  MotionVector vec; ///< Motion vector
83  double angle; ///< Angle of rotation
84  double zoom; ///< Zoom percentage
85 } Transform;
86 
87 #define MAX_R 64
88 
89 typedef struct DeshakeContext {
90  const AVClass *class;
91  int counts[2*MAX_R+1][2*MAX_R+1]; ///< Scratch buffer for motion search
92  double *angles; ///< Scratch buffer for block angles
93  unsigned angles_size;
94  AVFrame *ref; ///< Previous frame
95  int rx; ///< Maximum horizontal shift
96  int ry; ///< Maximum vertical shift
97  int edge; ///< Edge fill method
98  int blocksize; ///< Size of blocks to compare
99  int contrast; ///< Contrast threshold
100  int search; ///< Motion search method
101  av_pixelutils_sad_fn sad; ///< Sum of the absolute difference function
102  Transform last; ///< Transform from last frame
103  int refcount; ///< Number of reference frames (defines averaging window)
104  FILE *fp;
106  int cw; ///< Crop motion search to this box
107  int ch;
108  int cx;
109  int cy;
110  char *filename; ///< Motion search detailed log filename
111  int opencl;
112  int (* transform)(AVFilterContext *ctx, int width, int height, int cw, int ch,
113  const float *matrix_y, const float *matrix_uv, enum InterpolateMethod interpolate,
114  enum FillMethod fill, AVFrame *in, AVFrame *out);
116 
117 #define OFFSET(x) offsetof(DeshakeContext, x)
118 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
119 
120 static const AVOption deshake_options[] = {
121  { "x", "set x for the rectangular search area", OFFSET(cx), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, .flags = FLAGS },
122  { "y", "set y for the rectangular search area", OFFSET(cy), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, .flags = FLAGS },
123  { "w", "set width for the rectangular search area", OFFSET(cw), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, .flags = FLAGS },
124  { "h", "set height for the rectangular search area", OFFSET(ch), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, .flags = FLAGS },
125  { "rx", "set x for the rectangular search area", OFFSET(rx), AV_OPT_TYPE_INT, {.i64=16}, 0, MAX_R, .flags = FLAGS },
126  { "ry", "set y for the rectangular search area", OFFSET(ry), AV_OPT_TYPE_INT, {.i64=16}, 0, MAX_R, .flags = FLAGS },
127  { "edge", "set edge mode", OFFSET(edge), AV_OPT_TYPE_INT, {.i64=FILL_MIRROR}, FILL_BLANK, FILL_COUNT-1, FLAGS, .unit = "edge"},
128  { "blank", "fill zeroes at blank locations", 0, AV_OPT_TYPE_CONST, {.i64=FILL_BLANK}, INT_MIN, INT_MAX, FLAGS, .unit = "edge" },
129  { "original", "original image at blank locations", 0, AV_OPT_TYPE_CONST, {.i64=FILL_ORIGINAL}, INT_MIN, INT_MAX, FLAGS, .unit = "edge" },
130  { "clamp", "extruded edge value at blank locations", 0, AV_OPT_TYPE_CONST, {.i64=FILL_CLAMP}, INT_MIN, INT_MAX, FLAGS, .unit = "edge" },
131  { "mirror", "mirrored edge at blank locations", 0, AV_OPT_TYPE_CONST, {.i64=FILL_MIRROR}, INT_MIN, INT_MAX, FLAGS, .unit = "edge" },
132  { "blocksize", "set motion search blocksize", OFFSET(blocksize), AV_OPT_TYPE_INT, {.i64=8}, 4, 128, .flags = FLAGS },
133  { "contrast", "set contrast threshold for blocks", OFFSET(contrast), AV_OPT_TYPE_INT, {.i64=125}, 1, 255, .flags = FLAGS },
134  { "search", "set search strategy", OFFSET(search), AV_OPT_TYPE_INT, {.i64=EXHAUSTIVE}, EXHAUSTIVE, SEARCH_COUNT-1, FLAGS, .unit = "smode" },
135  { "exhaustive", "exhaustive search", 0, AV_OPT_TYPE_CONST, {.i64=EXHAUSTIVE}, INT_MIN, INT_MAX, FLAGS, .unit = "smode" },
136  { "less", "less exhaustive search", 0, AV_OPT_TYPE_CONST, {.i64=SMART_EXHAUSTIVE}, INT_MIN, INT_MAX, FLAGS, .unit = "smode" },
137  { "filename", "set motion search detailed log file name", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
138  { "opencl", "ignored", OFFSET(opencl), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags = FLAGS },
139  { NULL }
140 };
141 
142 AVFILTER_DEFINE_CLASS(deshake);
143 
144 static int cmp(const void *a, const void *b)
145 {
146  return FFDIFFSIGN(*(const double *)a, *(const double *)b);
147 }
148 
149 /**
150  * Cleaned mean (cuts off 20% of values to remove outliers and then averages)
151  */
152 static double clean_mean(double *values, int count)
153 {
154  double mean = 0;
155  int cut = count / 5;
156  int x;
157 
158  AV_QSORT(values, count, double, cmp);
159 
160  for (x = cut; x < count - cut; x++) {
161  mean += values[x];
162  }
163 
164  return mean / (count - cut * 2);
165 }
166 
167 /**
168  * Find the most likely shift in motion between two frames for a given
169  * macroblock. Test each block against several shifts given by the rx
170  * and ry attributes. Searches using a simple matrix of those shifts and
171  * chooses the most likely shift by the smallest difference in blocks.
172  */
173 static void find_block_motion(DeshakeContext *deshake, uint8_t *src1,
174  uint8_t *src2, int cx, int cy, int stride,
176 {
177  int x, y;
178  int diff;
179  int smallest = INT_MAX;
180  int tmp, tmp2;
181 
182  #define CMP(i, j) deshake->sad(src1 + cy * stride + cx, stride,\
183  src2 + (j) * stride + (i), stride)
184 
185  if (deshake->search == EXHAUSTIVE) {
186  // Compare every possible position - this is sloooow!
187  for (y = -deshake->ry; y <= deshake->ry; y++) {
188  for (x = -deshake->rx; x <= deshake->rx; x++) {
189  diff = CMP(cx - x, cy - y);
190  if (diff < smallest) {
191  smallest = diff;
192  mv->x = x;
193  mv->y = y;
194  }
195  }
196  }
197  } else if (deshake->search == SMART_EXHAUSTIVE) {
198  // Compare every other possible position and find the best match
199  for (y = -deshake->ry + 1; y < deshake->ry; y += 2) {
200  for (x = -deshake->rx + 1; x < deshake->rx; x += 2) {
201  diff = CMP(cx - x, cy - y);
202  if (diff < smallest) {
203  smallest = diff;
204  mv->x = x;
205  mv->y = y;
206  }
207  }
208  }
209 
210  // Hone in on the specific best match around the match we found above
211  tmp = mv->x;
212  tmp2 = mv->y;
213 
214  for (y = tmp2 - 1; y <= tmp2 + 1; y++) {
215  for (x = tmp - 1; x <= tmp + 1; x++) {
216  if (x == tmp && y == tmp2)
217  continue;
218 
219  diff = CMP(cx - x, cy - y);
220  if (diff < smallest) {
221  smallest = diff;
222  mv->x = x;
223  mv->y = y;
224  }
225  }
226  }
227  }
228 
229  if (smallest > 512) {
230  mv->x = -1;
231  mv->y = -1;
232  }
233  //av_log(NULL, AV_LOG_ERROR, "%d\n", smallest);
234  //av_log(NULL, AV_LOG_ERROR, "Final: (%d, %d) = %d x %d\n", cx, cy, mv->x, mv->y);
235 }
236 
237 /**
238  * Find the contrast of a given block. When searching for global motion we
239  * really only care about the high contrast blocks, so using this method we
240  * can actually skip blocks we don't care much about.
241  */
242 static int block_contrast(uint8_t *src, int x, int y, int stride, int blocksize)
243 {
244  int highest = 0;
245  int lowest = 255;
246  int i, j, pos;
247 
248  for (i = 0; i <= blocksize * 2; i++) {
249  // We use a width of 16 here to match the sad function
250  for (j = 0; j <= 15; j++) {
251  pos = (y + i) * stride + (x + j);
252  if (src[pos] < lowest)
253  lowest = src[pos];
254  else if (src[pos] > highest) {
255  highest = src[pos];
256  }
257  }
258  }
259 
260  return highest - lowest;
261 }
262 
263 /**
264  * Find the rotation for a given block.
265  */
266 static double block_angle(int x, int y, int cx, int cy, IntMotionVector *shift)
267 {
268  double a1, a2, diff;
269 
270  a1 = atan2(y - cy, x - cx);
271  a2 = atan2(y - cy + shift->y, x - cx + shift->x);
272 
273  diff = a2 - a1;
274 
275  return (diff > M_PI) ? diff - 2 * M_PI :
276  (diff < -M_PI) ? diff + 2 * M_PI :
277  diff;
278 }
279 
280 /**
281  * Find the estimated global motion for a scene given the most likely shift
282  * for each block in the frame. The global motion is estimated to be the
283  * same as the motion from most blocks in the frame, so if most blocks
284  * move one pixel to the right and two pixels down, this would yield a
285  * motion vector (1, -2).
286  */
287 static void find_motion(DeshakeContext *deshake, uint8_t *src1, uint8_t *src2,
288  int width, int height, int stride, Transform *t)
289 {
290  int x, y;
291  IntMotionVector mv = {0, 0};
292  int count_max_value = 0;
293  int contrast;
294 
295  int pos;
296  int center_x = 0, center_y = 0;
297  double p_x, p_y;
298 
299  av_fast_malloc(&deshake->angles, &deshake->angles_size, width * height / (16 * deshake->blocksize) * sizeof(*deshake->angles));
300 
301  // Reset counts to zero
302  for (x = 0; x < deshake->rx * 2 + 1; x++) {
303  for (y = 0; y < deshake->ry * 2 + 1; y++) {
304  deshake->counts[x][y] = 0;
305  }
306  }
307 
308  pos = 0;
309  // Find motion for every block and store the motion vector in the counts
310  for (y = deshake->ry; y < height - deshake->ry - (deshake->blocksize * 2); y += deshake->blocksize * 2) {
311  // We use a width of 16 here to match the sad function
312  for (x = deshake->rx; x < width - deshake->rx - 16; x += 16) {
313  // If the contrast is too low, just skip this block as it probably
314  // won't be very useful to us.
315  contrast = block_contrast(src2, x, y, stride, deshake->blocksize);
316  if (contrast > deshake->contrast) {
317  //av_log(NULL, AV_LOG_ERROR, "%d\n", contrast);
318  find_block_motion(deshake, src1, src2, x, y, stride, &mv);
319  if (mv.x != -1 && mv.y != -1) {
320  deshake->counts[mv.x + deshake->rx][mv.y + deshake->ry] += 1;
321  if (x > deshake->rx && y > deshake->ry)
322  deshake->angles[pos++] = block_angle(x, y, 0, 0, &mv);
323 
324  center_x += mv.x;
325  center_y += mv.y;
326  }
327  }
328  }
329  }
330 
331  if (pos) {
332  center_x /= pos;
333  center_y /= pos;
334  t->angle = clean_mean(deshake->angles, pos);
335  if (t->angle < 0.001)
336  t->angle = 0;
337  } else {
338  t->angle = 0;
339  }
340 
341  // Find the most common motion vector in the frame and use it as the gmv
342  for (y = deshake->ry * 2; y >= 0; y--) {
343  for (x = 0; x < deshake->rx * 2 + 1; x++) {
344  //av_log(NULL, AV_LOG_ERROR, "%5d ", deshake->counts[x][y]);
345  if (deshake->counts[x][y] > count_max_value) {
346  t->vec.x = x - deshake->rx;
347  t->vec.y = y - deshake->ry;
348  count_max_value = deshake->counts[x][y];
349  }
350  }
351  //av_log(NULL, AV_LOG_ERROR, "\n");
352  }
353 
354  p_x = (center_x - width / 2.0);
355  p_y = (center_y - height / 2.0);
356  t->vec.x += (cos(t->angle)-1)*p_x - sin(t->angle)*p_y;
357  t->vec.y += sin(t->angle)*p_x + (cos(t->angle)-1)*p_y;
358 
359  // Clamp max shift & rotation?
360  t->vec.x = av_clipf(t->vec.x, -deshake->rx * 2, deshake->rx * 2);
361  t->vec.y = av_clipf(t->vec.y, -deshake->ry * 2, deshake->ry * 2);
362  t->angle = av_clipf(t->angle, -0.1, 0.1);
363 
364  //av_log(NULL, AV_LOG_ERROR, "%d x %d\n", avg->x, avg->y);
365 }
366 
368  int width, int height, int cw, int ch,
369  const float *matrix_y, const float *matrix_uv,
371  enum FillMethod fill, AVFrame *in, AVFrame *out)
372 {
373  int i = 0, ret = 0;
374  const float *matrixs[3];
375  int plane_w[3], plane_h[3];
376  matrixs[0] = matrix_y;
377  matrixs[1] = matrixs[2] = matrix_uv;
378  plane_w[0] = width;
379  plane_w[1] = plane_w[2] = cw;
380  plane_h[0] = height;
381  plane_h[1] = plane_h[2] = ch;
382 
383  for (i = 0; i < 3; i++) {
384  // Transform the luma and chroma planes
385  ret = ff_affine_transform(in->data[i], out->data[i], in->linesize[i],
386  out->linesize[i], plane_w[i], plane_h[i],
387  matrixs[i], interpolate, fill);
388  if (ret < 0)
389  return ret;
390  }
391  return ret;
392 }
393 
395 {
396  DeshakeContext *deshake = ctx->priv;
397 
398  deshake->refcount = 20; // XXX: add to options?
399  deshake->blocksize /= 2;
400  deshake->blocksize = av_clip(deshake->blocksize, 4, 128);
401 
402  if (deshake->rx % 16) {
403  av_log(ctx, AV_LOG_ERROR, "rx must be a multiple of 16\n");
404  return AVERROR_PATCHWELCOME;
405  }
406 
407  if (deshake->filename)
408  deshake->fp = avpriv_fopen_utf8(deshake->filename, "w");
409  if (deshake->fp)
410  fwrite("Ori x, Avg x, Fin x, Ori y, Avg y, Fin y, Ori angle, Avg angle, Fin angle, Ori zoom, Avg zoom, Fin zoom\n", 1, 104, deshake->fp);
411 
412  // Quadword align left edge of box for MMX code, adjust width if necessary
413  // to keep right margin
414  if (deshake->cx > 0) {
415  deshake->cw += deshake->cx - (deshake->cx & ~15);
416  deshake->cx &= ~15;
417  }
418  deshake->transform = deshake_transform_c;
419 
420  av_log(ctx, AV_LOG_VERBOSE, "cx: %d, cy: %d, cw: %d, ch: %d, rx: %d, ry: %d, edge: %d blocksize: %d contrast: %d search: %d\n",
421  deshake->cx, deshake->cy, deshake->cw, deshake->ch,
422  deshake->rx, deshake->ry, deshake->edge, deshake->blocksize * 2, deshake->contrast, deshake->search);
423 
424  return 0;
425 }
426 
427 static const enum AVPixelFormat pix_fmts[] = {
431 };
432 
434 {
435  DeshakeContext *deshake = link->dst->priv;
436 
437  deshake->ref = NULL;
438  deshake->last.vec.x = 0;
439  deshake->last.vec.y = 0;
440  deshake->last.angle = 0;
441  deshake->last.zoom = 0;
442 
443  return 0;
444 }
445 
447 {
448  DeshakeContext *deshake = ctx->priv;
449  av_frame_free(&deshake->ref);
450  av_freep(&deshake->angles);
451  deshake->angles_size = 0;
452  if (deshake->fp)
453  fclose(deshake->fp);
454 }
455 
457 {
458  DeshakeContext *deshake = link->dst->priv;
459  AVFilterLink *outlink = link->dst->outputs[0];
460  AVFrame *out;
461  Transform t = {{0},0}, orig = {{0},0};
462  float matrix_y[9], matrix_uv[9];
463  float alpha = 2.0 / deshake->refcount;
464  char tmp[256];
465  int ret = 0;
467  const int chroma_width = AV_CEIL_RSHIFT(link->w, desc->log2_chroma_w);
468  const int chroma_height = AV_CEIL_RSHIFT(link->h, desc->log2_chroma_h);
469  int aligned;
470  float transform_zoom;
471 
472  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
473  if (!out) {
474  av_frame_free(&in);
475  return AVERROR(ENOMEM);
476  }
478 
479  aligned = !((intptr_t)in->data[0] & 15 | in->linesize[0] & 15);
480  deshake->sad = av_pixelutils_get_sad_fn(4, 4, aligned, deshake); // 16x16, 2nd source unaligned
481  if (!deshake->sad)
482  return AVERROR(EINVAL);
483 
484  if (deshake->cx < 0 || deshake->cy < 0 || deshake->cw < 0 || deshake->ch < 0) {
485  // Find the most likely global motion for the current frame
486  find_motion(deshake, (deshake->ref == NULL) ? in->data[0] : deshake->ref->data[0], in->data[0], link->w, link->h, in->linesize[0], &t);
487  } else {
488  uint8_t *src1 = (deshake->ref == NULL) ? in->data[0] : deshake->ref->data[0];
489  uint8_t *src2 = in->data[0];
490 
491  deshake->cx = FFMIN(deshake->cx, link->w);
492  deshake->cy = FFMIN(deshake->cy, link->h);
493 
494  if ((unsigned)deshake->cx + (unsigned)deshake->cw > link->w) deshake->cw = link->w - deshake->cx;
495  if ((unsigned)deshake->cy + (unsigned)deshake->ch > link->h) deshake->ch = link->h - deshake->cy;
496 
497  // Quadword align right margin
498  deshake->cw &= ~15;
499 
500  src1 += deshake->cy * in->linesize[0] + deshake->cx;
501  src2 += deshake->cy * in->linesize[0] + deshake->cx;
502 
503  find_motion(deshake, src1, src2, deshake->cw, deshake->ch, in->linesize[0], &t);
504  }
505 
506 
507  // Copy transform so we can output it later to compare to the smoothed value
508  orig.vec.x = t.vec.x;
509  orig.vec.y = t.vec.y;
510  orig.angle = t.angle;
511  orig.zoom = t.zoom;
512 
513  // Generate a one-sided moving exponential average
514  deshake->avg.vec.x = alpha * t.vec.x + (1.0 - alpha) * deshake->avg.vec.x;
515  deshake->avg.vec.y = alpha * t.vec.y + (1.0 - alpha) * deshake->avg.vec.y;
516  deshake->avg.angle = alpha * t.angle + (1.0 - alpha) * deshake->avg.angle;
517  deshake->avg.zoom = alpha * t.zoom + (1.0 - alpha) * deshake->avg.zoom;
518 
519  // Remove the average from the current motion to detect the motion that
520  // is not on purpose, just as jitter from bumping the camera
521  t.vec.x -= deshake->avg.vec.x;
522  t.vec.y -= deshake->avg.vec.y;
523  t.angle -= deshake->avg.angle;
524  t.zoom -= deshake->avg.zoom;
525 
526  // Invert the motion to undo it
527  t.vec.x *= -1;
528  t.vec.y *= -1;
529  t.angle *= -1;
530 
531  // Write statistics to file
532  if (deshake->fp) {
533  snprintf(tmp, 256, "%f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f\n", orig.vec.x, deshake->avg.vec.x, t.vec.x, orig.vec.y, deshake->avg.vec.y, t.vec.y, orig.angle, deshake->avg.angle, t.angle, orig.zoom, deshake->avg.zoom, t.zoom);
534  fwrite(tmp, 1, strlen(tmp), deshake->fp);
535  }
536 
537  // Turn relative current frame motion into absolute by adding it to the
538  // last absolute motion
539  t.vec.x += deshake->last.vec.x;
540  t.vec.y += deshake->last.vec.y;
541  t.angle += deshake->last.angle;
542  t.zoom += deshake->last.zoom;
543 
544  // Shrink motion by 10% to keep things centered in the camera frame
545  t.vec.x *= 0.9;
546  t.vec.y *= 0.9;
547  t.angle *= 0.9;
548 
549  // Store the last absolute motion information
550  deshake->last.vec.x = t.vec.x;
551  deshake->last.vec.y = t.vec.y;
552  deshake->last.angle = t.angle;
553  deshake->last.zoom = t.zoom;
554 
555  transform_zoom = 1.0 + t.zoom / 100.0;
556 
557  // Generate a luma transformation matrix
558  ff_get_matrix(t.vec.x, t.vec.y, t.angle, transform_zoom, transform_zoom, matrix_y);
559  // Generate a chroma transformation matrix
560  ff_get_matrix(t.vec.x / (link->w / chroma_width), t.vec.y / (link->h / chroma_height), t.angle, transform_zoom, transform_zoom, matrix_uv);
561  // Transform the luma and chroma planes
562  ret = deshake->transform(link->dst, link->w, link->h, chroma_width, chroma_height,
563  matrix_y, matrix_uv, INTERPOLATE_BILINEAR, deshake->edge, in, out);
564 
565  // Cleanup the old reference frame
566  av_frame_free(&deshake->ref);
567 
568  if (ret < 0)
569  goto fail;
570 
571  // Store the current frame as the reference frame for calculating the
572  // motion of the next frame
573  deshake->ref = in;
574 
575  return ff_filter_frame(outlink, out);
576 fail:
577  av_frame_free(&out);
578  return ret;
579 }
580 
581 static const AVFilterPad deshake_inputs[] = {
582  {
583  .name = "default",
584  .type = AVMEDIA_TYPE_VIDEO,
585  .filter_frame = filter_frame,
586  .config_props = config_props,
587  },
588 };
589 
591  .name = "deshake",
592  .description = NULL_IF_CONFIG_SMALL("Stabilize shaky video."),
593  .priv_size = sizeof(DeshakeContext),
594  .init = init,
595  .uninit = uninit,
599  .priv_class = &deshake_class,
600 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:112
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
av_clip
#define av_clip
Definition: common.h:98
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
DeshakeContext::ry
int ry
Maximum vertical shift.
Definition: vf_deshake.c:96
MotionVector::y
int16_t y
Definition: agm.c:39
out
FILE * out
Definition: movenc.c:54
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1018
IntMotionVector::y
int y
Vertical shift.
Definition: vf_deshake.c:73
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2962
FILL_CLAMP
@ FILL_CLAMP
Definition: transform.h:54
src1
const pixel * src1
Definition: h264pred_template.c:421
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: internal.h:162
mv
static const int8_t mv[256][2]
Definition: 4xm.c:80
MotionVector::x
double x
Horizontal shift.
Definition: vf_deshake.c:77
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:130
av_pixelutils_sad_fn
int(* av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1, const uint8_t *src2, ptrdiff_t stride2)
Sum of abs(src1[x] - src2[x])
Definition: pixelutils.h:28
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:344
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
pixdesc.h
DeshakeContext::angles_size
unsigned angles_size
Definition: vf_deshake.c:93
deshake_options
static const AVOption deshake_options[]
Definition: vf_deshake.c:120
AVOption
AVOption.
Definition: opt.h:346
b
#define b
Definition: input.c:41
DeshakeContext::transform
int(* transform)(AVFilterContext *ctx, int width, int height, int cw, int ch, const float *matrix_y, const float *matrix_uv, enum InterpolateMethod interpolate, enum FillMethod fill, AVFrame *in, AVFrame *out)
Definition: vf_deshake.c:112
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
AV_PIX_FMT_YUV440P
@ AV_PIX_FMT_YUV440P
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:106
deshake_inputs
static const AVFilterPad deshake_inputs[]
Definition: vf_deshake.c:581
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
InterpolateMethod
InterpolateMethod
Definition: transform.h:39
FLAGS
#define FLAGS
Definition: vf_deshake.c:118
video.h
block_contrast
static int block_contrast(uint8_t *src, int x, int y, int stride, int blocksize)
Find the contrast of a given block.
Definition: vf_deshake.c:242
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:365
DeshakeContext::blocksize
int blocksize
Size of blocks to compare.
Definition: vf_deshake.c:98
FILL_ORIGINAL
@ FILL_ORIGINAL
Definition: transform.h:53
FILL_BLANK
@ FILL_BLANK
Definition: transform.h:52
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(deshake)
MotionVector::x
int16_t x
Definition: agm.c:39
DeshakeContext::last
Transform last
Transform from last frame.
Definition: vf_deshake.c:102
ff_vf_deshake
const AVFilter ff_vf_deshake
Definition: vf_deshake.c:590
fail
#define fail()
Definition: checkasm.h:179
DeshakeContext::ch
int ch
Definition: vf_deshake.c:107
init
static av_cold int init(AVFilterContext *ctx)
Definition: vf_deshake.c:394
DeshakeContext::opencl
int opencl
Definition: vf_deshake.c:111
cmp
static int cmp(const void *a, const void *b)
Definition: vf_deshake.c:144
CMP
#define CMP(i, j)
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:33
FFDIFFSIGN
#define FFDIFFSIGN(x, y)
Comparator.
Definition: macros.h:45
a1
#define a1
Definition: regdef.h:47
aligned
static int aligned(int val)
Definition: dashdec.c:170
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
OFFSET
#define OFFSET(x)
Definition: vf_deshake.c:117
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
Transform::vec
MotionVector vec
Motion vector.
Definition: vf_deshake.c:82
AV_PIX_FMT_YUVJ422P
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:86
FILL_COUNT
@ FILL_COUNT
Definition: transform.h:56
width
#define width
DeshakeContext::refcount
int refcount
Number of reference frames (defines averaging window)
Definition: vf_deshake.c:103
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
DeshakeContext::avg
Transform avg
Definition: vf_deshake.c:105
Transform::zoom
double zoom
Zoom percentage.
Definition: vf_deshake.c:84
DeshakeContext::ref
AVFrame * ref
Previous frame.
Definition: vf_deshake.c:94
ctx
AVFormatContext * ctx
Definition: movenc.c:48
SearchMethod
SearchMethod
Definition: vf_deshake.c:65
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
file_open.h
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
AV_PIX_FMT_YUVJ444P
@ AV_PIX_FMT_YUVJ444P
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:87
DeshakeContext::filename
char * filename
Motion search detailed log filename.
Definition: vf_deshake.c:110
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
AVERROR_PATCHWELCOME
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:64
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:679
AV_PIX_FMT_YUVJ420P
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:85
SMART_EXHAUSTIVE
@ SMART_EXHAUSTIVE
Search most possible positions (faster)
Definition: vf_deshake.c:67
block_angle
static double block_angle(int x, int y, int cx, int cy, IntMotionVector *shift)
Find the rotation for a given block.
Definition: vf_deshake.c:266
av_clipf
av_clipf
Definition: af_crystalizer.c:121
DeshakeContext::cx
int cx
Definition: vf_deshake.c:108
DeshakeContext::cw
int cw
Crop motion search to this box.
Definition: vf_deshake.c:106
find_motion
static void find_motion(DeshakeContext *deshake, uint8_t *src1, uint8_t *src2, int width, int height, int stride, Transform *t)
Find the estimated global motion for a scene given the most likely shift for each block in the frame.
Definition: vf_deshake.c:287
qsort.h
clean_mean
static double clean_mean(double *values, int count)
Cleaned mean (cuts off 20% of values to remove outliers and then averages)
Definition: vf_deshake.c:152
find_block_motion
static void find_block_motion(DeshakeContext *deshake, uint8_t *src1, uint8_t *src2, int cx, int cy, int stride, IntMotionVector *mv)
Find the most likely shift in motion between two frames for a given macroblock.
Definition: vf_deshake.c:173
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_deshake.c:427
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:106
MotionVector::y
double y
Vertical shift.
Definition: vf_deshake.c:78
shift
static int shift(int a, int b)
Definition: bonk.c:262
DeshakeContext::counts
int counts[2 *MAX_R+1][2 *MAX_R+1]
Scratch buffer for motion search.
Definition: vf_deshake.c:91
EXHAUSTIVE
@ EXHAUSTIVE
Search all possible positions.
Definition: vf_deshake.c:66
FILL_MIRROR
@ FILL_MIRROR
Definition: transform.h:55
pixelutils.h
AVFrame::format
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames,...
Definition: frame.h:431
diff
static av_always_inline int diff(const struct color_info *a, const struct color_info *b, const int trans_thresh)
Definition: vf_paletteuse.c:164
DeshakeContext::angles
double * angles
Scratch buffer for block angles.
Definition: vf_deshake.c:92
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
INTERPOLATE_BILINEAR
@ INTERPOLATE_BILINEAR
Definition: transform.h:41
SEARCH_COUNT
@ SEARCH_COUNT
Definition: vf_deshake.c:68
MotionVector
Definition: agm.c:38
interpolate
static void interpolate(float *out, float v1, float v2, int size)
Definition: twinvq.c:84
M_PI
#define M_PI
Definition: mathematics.h:67
MAX_R
#define MAX_R
Definition: vf_deshake.c:87
internal.h
filter_frame
static int filter_frame(AVFilterLink *link, AVFrame *in)
Definition: vf_deshake.c:456
ff_get_matrix
void ff_get_matrix(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float *matrix)
Get an affine transformation matrix from given translation, rotation, and zoom factors.
Definition: transform.c:106
config_props
static int config_props(AVFilterLink *link)
Definition: vf_deshake.c:433
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
AV_QSORT
#define AV_QSORT(p, num, type, cmp)
Quicksort This sort is fast, and fully inplace but not stable and it is possible to construct input t...
Definition: qsort.h:33
src2
const pixel * src2
Definition: h264pred_template.c:422
a2
#define a2
Definition: regdef.h:48
common.h
DeshakeContext::contrast
int contrast
Contrast threshold.
Definition: vf_deshake.c:99
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_YUVJ440P
@ AV_PIX_FMT_YUVJ440P
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
Definition: pixfmt.h:107
Transform::angle
double angle
Angle of rotation.
Definition: vf_deshake.c:83
DeshakeContext::search
int search
Motion search method.
Definition: vf_deshake.c:100
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:39
avpriv_fopen_utf8
FILE * avpriv_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
Definition: file_open.c:159
stride
#define stride
Definition: h264pred_template.c:537
AVFilter
Filter definition.
Definition: avfilter.h:166
ret
ret
Definition: filter_design.txt:187
search
static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax, int ymin, int ymax, int *best_x, int *best_y, float best_score)
Definition: vf_find_rect.c:147
pos
unsigned int pos
Definition: spdifenc.c:413
transform.h
deshake_transform_c
static int deshake_transform_c(AVFilterContext *ctx, int width, int height, int cw, int ch, const float *matrix_y, const float *matrix_uv, enum InterpolateMethod interpolate, enum FillMethod fill, AVFrame *in, AVFrame *out)
Definition: vf_deshake.c:367
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_deshake.c:446
ff_affine_transform
int ff_affine_transform(const uint8_t *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, const float *matrix, enum InterpolateMethod interpolate, enum FillMethod fill)
Do an affine transformation with the given interpolation method.
Definition: transform.c:125
DeshakeContext::edge
int edge
Edge fill method.
Definition: vf_deshake.c:97
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
Transform
Definition: vf_deshake.c:81
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:235
avfilter.h
av_pixelutils_get_sad_fn
av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligned, void *log_ctx)
Get a potentially optimized pointer to a Sum-of-absolute-differences function (see the av_pixelutils_...
Definition: pixelutils.c:72
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
mean
static float mean(const float *input, int size)
Definition: vf_nnedi.c:862
DeshakeContext::fp
FILE * fp
Definition: vf_deshake.c:104
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
desc
const char * desc
Definition: libsvtav1.c:75
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
mem.h
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
DeshakeContext::rx
int rx
Maximum horizontal shift.
Definition: vf_deshake.c:95
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:251
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
av_fast_malloc
void av_fast_malloc(void *ptr, unsigned int *size, size_t min_size)
Allocate a buffer, reusing the given one if large enough.
Definition: mem.c:555
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
FillMethod
FillMethod
Definition: transform.h:51
DeshakeContext
Definition: vf_deshake.c:89
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:389
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Definition: opt.h:239
DeshakeContext::cy
int cy
Definition: vf_deshake.c:109
int
int
Definition: ffmpeg_filter.c:409
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Definition: opt.h:244
snprintf
#define snprintf
Definition: snprintf.h:34
IntMotionVector::x
int x
Horizontal shift.
Definition: vf_deshake.c:72
IntMotionVector
Definition: vf_deshake.c:71
DeshakeContext::sad
av_pixelutils_sad_fn sad
Sum of the absolute difference function.
Definition: vf_deshake.c:101