FFmpeg
vp8dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Martin Storsjo
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 
23 #include "libavcodec/avcodec.h"
24 #include "libavcodec/vp8dsp.h"
25 
26 #include "libavutil/common.h"
27 #include "libavutil/intreadwrite.h"
28 
29 #include "checkasm.h"
30 
31 #define PIXEL_STRIDE 16
32 
33 #define randomize_buffers(src, dst, stride, coef) \
34  do { \
35  int x, y; \
36  for (y = 0; y < 4; y++) { \
37  AV_WN32A((src) + y * (stride), rnd()); \
38  AV_WN32A((dst) + y * (stride), rnd()); \
39  for (x = 0; x < 4; x++) \
40  (coef)[y * 4 + x] = (src)[y * (stride) + x] - \
41  (dst)[y * (stride) + x]; \
42  } \
43  } while (0)
44 
45 static void dct4x4(int16_t *coef)
46 {
47  int i;
48  for (i = 0; i < 4; i++) {
49  const int a1 = (coef[i*4 + 0] + coef[i*4 + 3]) * 8;
50  const int b1 = (coef[i*4 + 1] + coef[i*4 + 2]) * 8;
51  const int c1 = (coef[i*4 + 1] - coef[i*4 + 2]) * 8;
52  const int d1 = (coef[i*4 + 0] - coef[i*4 + 3]) * 8;
53  coef[i*4 + 0] = a1 + b1;
54  coef[i*4 + 1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
55  coef[i*4 + 2] = a1 - b1;
56  coef[i*4 + 3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
57  }
58  for (i = 0; i < 4; i++) {
59  const int a1 = coef[i + 0*4] + coef[i + 3*4];
60  const int b1 = coef[i + 1*4] + coef[i + 2*4];
61  const int c1 = coef[i + 1*4] - coef[i + 2*4];
62  const int d1 = coef[i + 0*4] - coef[i + 3*4];
63  coef[i + 0*4] = (a1 + b1 + 7) >> 4;
64  coef[i + 1*4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + !!d1;
65  coef[i + 2*4] = (a1 - b1 + 7) >> 4;
66  coef[i + 3*4] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
67  }
68 }
69 
70 static void wht4x4(int16_t *coef)
71 {
72  int i;
73  for (i = 0; i < 4; i++) {
74  int a1 = coef[0 * 4 + i];
75  int b1 = coef[1 * 4 + i];
76  int c1 = coef[2 * 4 + i];
77  int d1 = coef[3 * 4 + i];
78  int e1;
79  a1 += b1;
80  d1 -= c1;
81  e1 = (a1 - d1) >> 1;
82  b1 = e1 - b1;
83  c1 = e1 - c1;
84  a1 -= c1;
85  d1 += b1;
86  coef[0 * 4 + i] = a1;
87  coef[1 * 4 + i] = c1;
88  coef[2 * 4 + i] = d1;
89  coef[3 * 4 + i] = b1;
90  }
91  for (i = 0; i < 4; i++) {
92  int a1 = coef[i * 4 + 0];
93  int b1 = coef[i * 4 + 1];
94  int c1 = coef[i * 4 + 2];
95  int d1 = coef[i * 4 + 3];
96  int e1;
97  a1 += b1;
98  d1 -= c1;
99  e1 = (a1 - d1) >> 1;
100  b1 = e1 - b1;
101  c1 = e1 - c1;
102  a1 -= c1;
103  d1 += b1;
104  coef[i * 4 + 0] = a1 * 2;
105  coef[i * 4 + 1] = c1 * 2;
106  coef[i * 4 + 2] = d1 * 2;
107  coef[i * 4 + 3] = b1 * 2;
108  }
109 }
110 
111 static void check_idct(void)
112 {
113  LOCAL_ALIGNED_16(uint8_t, src, [4 * 4]);
114  LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4]);
115  LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4]);
116  LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4]);
117  LOCAL_ALIGNED_16(int16_t, coef, [4 * 4]);
118  LOCAL_ALIGNED_16(int16_t, subcoef0, [4 * 4]);
119  LOCAL_ALIGNED_16(int16_t, subcoef1, [4 * 4]);
120  VP8DSPContext d;
121  int dc;
122  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, ptrdiff_t stride);
123 
124  ff_vp8dsp_init(&d);
125  randomize_buffers(src, dst, 4, coef);
126 
127  dct4x4(coef);
128 
129  for (dc = 0; dc <= 1; dc++) {
130  void (*idct)(uint8_t *, int16_t *, ptrdiff_t) = dc ? d.vp8_idct_dc_add : d.vp8_idct_add;
131 
132  if (check_func(idct, "vp8_idct_%sadd", dc ? "dc_" : "")) {
133  if (dc) {
134  memset(subcoef0, 0, 4 * 4 * sizeof(int16_t));
135  subcoef0[0] = coef[0];
136  } else {
137  memcpy(subcoef0, coef, 4 * 4 * sizeof(int16_t));
138  }
139  memcpy(dst0, dst, 4 * 4);
140  memcpy(dst1, dst, 4 * 4);
141  memcpy(subcoef1, subcoef0, 4 * 4 * sizeof(int16_t));
142  // Note, this uses a pixel stride of 4, even though the real decoder uses a stride as a
143  // multiple of 16. If optimizations want to take advantage of that, this test needs to be
144  // updated to make it more like the h264dsp tests.
145  call_ref(dst0, subcoef0, 4);
146  call_new(dst1, subcoef1, 4);
147  if (memcmp(dst0, dst1, 4 * 4) ||
148  memcmp(subcoef0, subcoef1, 4 * 4 * sizeof(int16_t)))
149  fail();
150 
151  bench_new(dst1, subcoef1, 4);
152  }
153  }
154 }
155 
156 static void check_idct_dc4(void)
157 {
158  LOCAL_ALIGNED_16(uint8_t, src, [4 * 4 * 4]);
159  LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4 * 4]);
160  LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4 * 4]);
161  LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4 * 4]);
162  LOCAL_ALIGNED_16(int16_t, coef, [4], [4 * 4]);
163  LOCAL_ALIGNED_16(int16_t, subcoef0, [4], [4 * 4]);
164  LOCAL_ALIGNED_16(int16_t, subcoef1, [4], [4 * 4]);
165  VP8DSPContext d;
166  int i, chroma;
167  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
168 
169  ff_vp8dsp_init(&d);
170 
171  for (chroma = 0; chroma <= 1; chroma++) {
172  void (*idct4dc)(uint8_t *, int16_t[4][16], ptrdiff_t) = chroma ? d.vp8_idct_dc_add4uv : d.vp8_idct_dc_add4y;
173  if (check_func(idct4dc, "vp8_idct_dc_add4%s", chroma ? "uv" : "y")) {
174  ptrdiff_t stride = chroma ? 8 : 16;
175  int w = chroma ? 2 : 4;
176  for (i = 0; i < 4; i++) {
177  int blockx = 4 * (i % w);
178  int blocky = 4 * (i / w);
179  randomize_buffers(src + stride * blocky + blockx, dst + stride * blocky + blockx, stride, coef[i]);
180  dct4x4(coef[i]);
181  memset(&coef[i][1], 0, 15 * sizeof(int16_t));
182  }
183 
184  memcpy(dst0, dst, 4 * 4 * 4);
185  memcpy(dst1, dst, 4 * 4 * 4);
186  memcpy(subcoef0, coef, 4 * 4 * 4 * sizeof(int16_t));
187  memcpy(subcoef1, coef, 4 * 4 * 4 * sizeof(int16_t));
188  call_ref(dst0, subcoef0, stride);
189  call_new(dst1, subcoef1, stride);
190  if (memcmp(dst0, dst1, 4 * 4 * 4) ||
191  memcmp(subcoef0, subcoef1, 4 * 4 * 4 * sizeof(int16_t)))
192  fail();
193  bench_new(dst1, subcoef1, stride);
194  }
195  }
196 
197 }
198 
199 static void check_luma_dc_wht(void)
200 {
201  LOCAL_ALIGNED_16(int16_t, dc, [4 * 4]);
202  LOCAL_ALIGNED_16(int16_t, dc0, [4 * 4]);
203  LOCAL_ALIGNED_16(int16_t, dc1, [4 * 4]);
204  int16_t block[4][4][16];
205  LOCAL_ALIGNED_16(int16_t, block0, [4], [4][16]);
206  LOCAL_ALIGNED_16(int16_t, block1, [4], [4][16]);
207  VP8DSPContext d;
208  int dc_only;
209  int blockx, blocky;
210  declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t block[4][4][16], int16_t dc[16]);
211 
212  ff_vp8dsp_init(&d);
213 
214  for (blocky = 0; blocky < 4; blocky++) {
215  for (blockx = 0; blockx < 4; blockx++) {
216  uint8_t src[16], dst[16];
217  randomize_buffers(src, dst, 4, block[blocky][blockx]);
218 
219  dct4x4(block[blocky][blockx]);
220  dc[blocky * 4 + blockx] = block[blocky][blockx][0];
221  block[blocky][blockx][0] = rnd();
222  }
223  }
224  wht4x4(dc);
225 
226  for (dc_only = 0; dc_only <= 1; dc_only++) {
227  void (*idct)(int16_t [4][4][16], int16_t [16]) = dc_only ? d.vp8_luma_dc_wht_dc : d.vp8_luma_dc_wht;
228 
229  if (check_func(idct, "vp8_luma_dc_wht%s", dc_only ? "_dc" : "")) {
230  if (dc_only) {
231  memset(dc0, 0, 16 * sizeof(int16_t));
232  dc0[0] = dc[0];
233  } else {
234  memcpy(dc0, dc, 16 * sizeof(int16_t));
235  }
236  memcpy(dc1, dc0, 16 * sizeof(int16_t));
237  memcpy(block0, block, 4 * 4 * 16 * sizeof(int16_t));
238  memcpy(block1, block, 4 * 4 * 16 * sizeof(int16_t));
239  call_ref(block0, dc0);
240  call_new(block1, dc1);
241  if (memcmp(block0, block1, 4 * 4 * 16 * sizeof(int16_t)) ||
242  memcmp(dc0, dc1, 16 * sizeof(int16_t)))
243  fail();
244  bench_new(block1, dc1);
245  }
246  }
247 }
248 
249 #define SRC_BUF_STRIDE 32
250 #define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE)
251 // The mc subpixel interpolation filter needs the 2 previous pixels in either
252 // direction, the +1 is to make sure the actual load addresses always are
253 // unaligned.
254 #define src (buf + 2 * SRC_BUF_STRIDE + 2 + 1)
255 
256 #undef randomize_buffers
257 #define randomize_buffers() \
258  do { \
259  int k; \
260  for (k = 0; k < SRC_BUF_SIZE; k += 4) { \
261  AV_WN32A(buf + k, rnd()); \
262  } \
263  } while (0)
264 
265 static void check_mc(void)
266 {
267  LOCAL_ALIGNED_16(uint8_t, buf, [32 * 32]);
268  LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16]);
269  LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16]);
270  VP8DSPContext d;
271  int type, k, dx, dy;
272  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, uint8_t *, ptrdiff_t, int, int, int);
273 
274  ff_vp78dsp_init(&d);
275 
276  for (type = 0; type < 2; type++) {
278  for (k = 1; k < 8; k++) {
279  int hsize = k / 3;
280  int size = 16 >> hsize;
281  int height = (size << 1) >> (k % 3);
282  for (dy = 0; dy < 3; dy++) {
283  for (dx = 0; dx < 3; dx++) {
284  char str[100];
285  if (dx || dy) {
286  if (type == 0) {
287  static const char *dx_names[] = { "", "h4", "h6" };
288  static const char *dy_names[] = { "", "v4", "v6" };
289  snprintf(str, sizeof(str), "epel%d_%s%s", size, dx_names[dx], dy_names[dy]);
290  } else {
291  snprintf(str, sizeof(str), "bilin%d_%s%s", size, dx ? "h" : "", dy ? "v" : "");
292  }
293  } else {
294  snprintf(str, sizeof(str), "pixels%d", size);
295  }
296  if (check_func(tab[hsize][dy][dx], "vp8_put_%s", str)) {
297  int mx, my;
298  int i;
299  if (type == 0) {
300  mx = dx == 2 ? 2 + 2 * (rnd() % 3) : dx == 1 ? 1 + 2 * (rnd() % 4) : 0;
301  my = dy == 2 ? 2 + 2 * (rnd() % 3) : dy == 1 ? 1 + 2 * (rnd() % 4) : 0;
302  } else {
303  mx = dx ? 1 + (rnd() % 7) : 0;
304  my = dy ? 1 + (rnd() % 7) : 0;
305  }
307  for (i = -2; i <= 3; i++) {
308  int val = (i == -1 || i == 2) ? 0 : 0xff;
309  // Set pixels in the first row and column to the maximum pattern,
310  // to test for potential overflows in the filter.
311  src[i ] = val;
312  src[i * SRC_BUF_STRIDE] = val;
313  }
314  call_ref(dst0, size, src, SRC_BUF_STRIDE, height, mx, my);
315  call_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
316  if (memcmp(dst0, dst1, size * height))
317  fail();
318  bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
319  }
320  }
321  }
322  }
323  }
324 }
325 
326 #undef randomize_buffers
327 
328 #define setpx(a, b, c) buf[(a) + (b) * jstride] = av_clip_uint8(c)
329 // Set the pixel to c +/- [0,d]
330 #define setdx(a, b, c, d) setpx(a, b, c - (d) + (rnd() % ((d) * 2 + 1)))
331 // Set the pixel to c +/- [d,d+e] (making sure it won't be clipped)
332 #define setdx2(a, b, o, c, d, e) setpx(a, b, o = c + ((d) + (rnd() % (e))) * (c >= 128 ? -1 : 1))
333 
334 static void randomize_loopfilter_buffers(int lineoff, int str,
335  int dir, int flim_E, int flim_I,
336  int hev_thresh, uint8_t *buf,
337  int force_hev)
338 {
339  uint32_t mask = 0xff;
340  int off = dir ? lineoff : lineoff * str;
341  int istride = dir ? 1 : str;
342  int jstride = dir ? str : 1;
343  int i;
344  for (i = 0; i < 8; i += 2) {
345  // Row 0 will trigger hev for q0/q1, row 2 will trigger hev for p0/p1,
346  // rows 4 and 6 will not trigger hev.
347  // force_hev 1 will make sure all rows trigger hev, while force_hev -1
348  // makes none of them trigger it.
349  int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
350  setpx(idx, 0, q0 = rnd() & mask);
351  if (i == 0 && force_hev >= 0 || force_hev > 0)
352  setdx2(idx, 1, q1, q0, hev_thresh + 1, flim_I - hev_thresh - 1);
353  else
354  setdx(idx, 1, q1 = q0, hev_thresh);
355  setdx(idx, 2, q2 = q1, flim_I);
356  setdx(idx, 3, q2, flim_I);
357  setdx(idx, -1, p0 = q0, flim_E >> 2);
358  if (i == 2 && force_hev >= 0 || force_hev > 0)
359  setdx2(idx, -2, p1, p0, hev_thresh + 1, flim_I - hev_thresh - 1);
360  else
361  setdx(idx, -2, p1 = p0, hev_thresh);
362  setdx(idx, -3, p2 = p1, flim_I);
363  setdx(idx, -4, p2, flim_I);
364  }
365 }
366 
367 // Fill the buffer with random pixels
368 static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
369 {
370  int x, y;
371  for (y = 0; y < h; y++)
372  for (x = 0; x < w; x++)
373  buf[y * stride + x] = rnd() & 0xff;
374 }
375 
376 #define randomize_buffers(buf, lineoff, str, force_hev) \
377  randomize_loopfilter_buffers(lineoff, str, dir, flim_E, flim_I, hev_thresh, buf, force_hev)
378 
379 static void check_loopfilter_16y(void)
380 {
381  LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
382  LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
383  VP8DSPContext d;
384  int dir, edge, force_hev;
385  int flim_E = 20, flim_I = 10, hev_thresh = 7;
386  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int, int, int);
387 
388  ff_vp8dsp_init(&d);
389 
390  for (dir = 0; dir < 2; dir++) {
391  int midoff = dir ? 4 * 16 : 4;
392  int midoff_aligned = dir ? 4 * 16 : 16;
393  uint8_t *buf0 = base0 + midoff_aligned;
394  uint8_t *buf1 = base1 + midoff_aligned;
395  for (edge = 0; edge < 2; edge++) {
396  void (*func)(uint8_t *, ptrdiff_t, int, int, int) = NULL;
397  switch (dir << 1 | edge) {
398  case (0 << 1) | 0: func = d.vp8_h_loop_filter16y; break;
399  case (1 << 1) | 0: func = d.vp8_v_loop_filter16y; break;
400  case (0 << 1) | 1: func = d.vp8_h_loop_filter16y_inner; break;
401  case (1 << 1) | 1: func = d.vp8_v_loop_filter16y_inner; break;
402  }
403  if (check_func(func, "vp8_loop_filter16y%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
404  for (force_hev = -1; force_hev <= 1; force_hev++) {
405  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
406  randomize_buffers(buf0, 0, 16, force_hev);
407  randomize_buffers(buf0, 8, 16, force_hev);
408  memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
409  call_ref(buf0, 16, flim_E, flim_I, hev_thresh);
410  call_new(buf1, 16, flim_E, flim_I, hev_thresh);
411  if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
412  fail();
413  }
414  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
415  randomize_buffers(buf0, 0, 16, 0);
416  randomize_buffers(buf0, 8, 16, 0);
417  bench_new(buf0, 16, flim_E, flim_I, hev_thresh);
418  }
419  }
420  }
421 }
422 
423 static void check_loopfilter_8uv(void)
424 {
425  LOCAL_ALIGNED_16(uint8_t, base0u, [32 + 16 * 16]);
426  LOCAL_ALIGNED_16(uint8_t, base0v, [32 + 16 * 16]);
427  LOCAL_ALIGNED_16(uint8_t, base1u, [32 + 16 * 16]);
428  LOCAL_ALIGNED_16(uint8_t, base1v, [32 + 16 * 16]);
429  VP8DSPContext d;
430  int dir, edge, force_hev;
431  int flim_E = 20, flim_I = 10, hev_thresh = 7;
432  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, uint8_t *, ptrdiff_t, int, int, int);
433 
434  ff_vp8dsp_init(&d);
435 
436  for (dir = 0; dir < 2; dir++) {
437  int midoff = dir ? 4 * 16 : 4;
438  int midoff_aligned = dir ? 4 * 16 : 16;
439  uint8_t *buf0u = base0u + midoff_aligned;
440  uint8_t *buf0v = base0v + midoff_aligned;
441  uint8_t *buf1u = base1u + midoff_aligned;
442  uint8_t *buf1v = base1v + midoff_aligned;
443  for (edge = 0; edge < 2; edge++) {
444  void (*func)(uint8_t *, uint8_t *, ptrdiff_t, int, int, int) = NULL;
445  switch (dir << 1 | edge) {
446  case (0 << 1) | 0: func = d.vp8_h_loop_filter8uv; break;
447  case (1 << 1) | 0: func = d.vp8_v_loop_filter8uv; break;
448  case (0 << 1) | 1: func = d.vp8_h_loop_filter8uv_inner; break;
449  case (1 << 1) | 1: func = d.vp8_v_loop_filter8uv_inner; break;
450  }
451  if (check_func(func, "vp8_loop_filter8uv%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
452  for (force_hev = -1; force_hev <= 1; force_hev++) {
453  fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
454  fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
455  randomize_buffers(buf0u, 0, 16, force_hev);
456  randomize_buffers(buf0v, 0, 16, force_hev);
457  memcpy(buf1u - midoff, buf0u - midoff, 16 * 16);
458  memcpy(buf1v - midoff, buf0v - midoff, 16 * 16);
459 
460  call_ref(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
461  call_new(buf1u, buf1v, 16, flim_E, flim_I, hev_thresh);
462  if (memcmp(buf0u - midoff, buf1u - midoff, 16 * 16) ||
463  memcmp(buf0v - midoff, buf1v - midoff, 16 * 16))
464  fail();
465  }
466  fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
467  fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
468  randomize_buffers(buf0u, 0, 16, 0);
469  randomize_buffers(buf0v, 0, 16, 0);
470  bench_new(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
471  }
472  }
473  }
474 }
475 
476 static void check_loopfilter_simple(void)
477 {
478  LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
479  LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
480  VP8DSPContext d;
481  int dir;
482  int flim_E = 20, flim_I = 30, hev_thresh = 0;
483  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int);
484 
485  ff_vp8dsp_init(&d);
486 
487  for (dir = 0; dir < 2; dir++) {
488  int midoff = dir ? 4 * 16 : 4;
489  int midoff_aligned = dir ? 4 * 16 : 16;
490  uint8_t *buf0 = base0 + midoff_aligned;
491  uint8_t *buf1 = base1 + midoff_aligned;
492  void (*func)(uint8_t *, ptrdiff_t, int) = dir ? d.vp8_v_loop_filter_simple : d.vp8_h_loop_filter_simple;
493  if (check_func(func, "vp8_loop_filter_simple_%s", dir ? "v" : "h")) {
494  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
495  randomize_buffers(buf0, 0, 16, -1);
496  randomize_buffers(buf0, 8, 16, -1);
497  memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
498  call_ref(buf0, 16, flim_E);
499  call_new(buf1, 16, flim_E);
500  if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
501  fail();
502  bench_new(buf0, 16, flim_E);
503  }
504  }
505 }
506 
508 {
509  check_idct();
510  check_idct_dc4();
512  report("idct");
513  check_mc();
514  report("mc");
518  report("loopfilter");
519 }
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
VP8DSPContext::vp8_h_loop_filter8uv
void(* vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:54
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:117
stride
int stride
Definition: mace.c:144
VP8DSPContext::vp8_h_loop_filter8uv_inner
void(* vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:65
q1
static const uint8_t q1[256]
Definition: twofish.c:96
VP8DSPContext::vp8_v_loop_filter8uv
void(* vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:52
check_loopfilter_simple
static void check_loopfilter_simple(void)
Definition: vp8dsp.c:476
w
uint8_t w
Definition: llviddspenc.c:38
check_func
#define check_func(func,...)
Definition: checkasm.h:111
VP8DSPContext::vp8_v_loop_filter16y
void(* vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:48
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1511
wht4x4
static void wht4x4(int16_t *coef)
Definition: vp8dsp.c:70
c1
static const uint64_t c1
Definition: murmur3.c:49
call_ref
#define call_ref(...)
Definition: checkasm.h:126
randomize_buffers
#define randomize_buffers(src, dst, stride, coef)
Definition: vp8dsp.c:376
fail
#define fail()
Definition: checkasm.h:120
tab
static const struct twinvq_data tab
Definition: twinvq_data.h:11135
checkasm.h
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
src
#define src
Definition: vp8dsp.c:254
idct
static void idct(int16_t block[64])
Definition: 4xm.c:163
a1
#define a1
Definition: regdef.h:47
rnd
#define rnd()
Definition: checkasm.h:104
buf
void * buf
Definition: avisynth_c.h:766
ff_vp8dsp_init
void ff_vp8dsp_init(VP8DSPContext *c)
VP8DSPContext::vp8_v_loop_filter16y_inner
void(* vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:58
vp8dsp.h
mask
static const uint16_t mask[17]
Definition: lzw.c:38
intreadwrite.h
randomize_loopfilter_buffers
static void randomize_loopfilter_buffers(int lineoff, int str, int dir, int flim_E, int flim_I, int hev_thresh, uint8_t *buf, int force_hev)
Definition: vp8dsp.c:334
VP8DSPContext::vp8_h_loop_filter_simple
void(* vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
Definition: vp8dsp.h:70
q0
static const uint8_t q0[256]
Definition: twofish.c:77
VP8DSPContext::vp8_v_loop_filter_simple
void(* vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
Definition: vp8dsp.h:69
check_idct_dc4
static void check_idct_dc4(void)
Definition: vp8dsp.c:156
if
if(ret)
Definition: filter_design.txt:179
checkasm_check_vp8dsp
void checkasm_check_vp8dsp(void)
Definition: vp8dsp.c:507
call_new
#define call_new(...)
Definition: checkasm.h:193
NULL
#define NULL
Definition: coverity.c:32
VP8DSPContext::vp8_h_loop_filter16y
void(* vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:50
VP8DSPContext::put_vp8_bilinear_pixels_tab
vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3]
Definition: vp8dsp.h:81
VP8DSPContext::vp8_h_loop_filter16y_inner
void(* vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:60
VP8DSPContext::vp8_luma_dc_wht
void(* vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16])
Definition: vp8dsp.h:38
setdx
#define setdx(a, b, c, d)
Definition: vp8dsp.c:330
vp8_mc_func
void(* vp8_mc_func)(uint8_t *dst, ptrdiff_t dstStride, uint8_t *src, ptrdiff_t srcStride, int h, int x, int y)
Definition: vp8dsp.h:33
VP8DSPContext
Definition: vp8dsp.h:37
setdx2
#define setdx2(a, b, o, c, d, e)
Definition: vp8dsp.c:332
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
size
int size
Definition: twinvq_data.h:11134
check_luma_dc_wht
static void check_luma_dc_wht(void)
Definition: vp8dsp.c:199
VP8DSPContext::vp8_idct_dc_add
void(* vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
Definition: vp8dsp.h:41
val
const char const char void * val
Definition: avisynth_c.h:863
height
#define height
VP8DSPContext::vp8_v_loop_filter8uv_inner
void(* vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:62
report
#define report
Definition: checkasm.h:123
bench_new
#define bench_new(...)
Definition: checkasm.h:253
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
VP8DSPContext::put_vp8_epel_pixels_tab
vp8_mc_func put_vp8_epel_pixels_tab[3][3][3]
first dimension: 4-log2(width) second dimension: 0 if no vertical interpolation is needed; 1 4-tap ve...
Definition: vp8dsp.h:80
common.h
check_mc
static void check_mc(void)
Definition: vp8dsp.c:265
uint8_t
uint8_t
Definition: audio_convert.c:194
avcodec.h
dct4x4
static void dct4x4(int16_t *coef)
Definition: vp8dsp.c:45
fill_loopfilter_buffers
static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
Definition: vp8dsp.c:368
VP8DSPContext::vp8_idct_dc_add4uv
void(* vp8_idct_dc_add4uv)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
Definition: vp8dsp.h:44
SRC_BUF_STRIDE
#define SRC_BUF_STRIDE
Definition: vp8dsp.c:249
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:31
check_idct
static void check_idct(void)
Definition: vp8dsp.c:111
VP8DSPContext::vp8_idct_dc_add4y
void(* vp8_idct_dc_add4y)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
Definition: vp8dsp.h:42
check_loopfilter_16y
static void check_loopfilter_16y(void)
Definition: vp8dsp.c:379
VP8DSPContext::vp8_luma_dc_wht_dc
void(* vp8_luma_dc_wht_dc)(int16_t block[4][4][16], int16_t dc[16])
Definition: vp8dsp.h:39
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: internal.h:131
VP8DSPContext::vp8_idct_add
void(* vp8_idct_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
Definition: vp8dsp.h:40
setpx
#define setpx(a, b, c)
Definition: vp8dsp.c:328
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
check_loopfilter_8uv
static void check_loopfilter_8uv(void)
Definition: vp8dsp.c:423
int
int
Definition: ffmpeg_filter.c:191
snprintf
#define snprintf
Definition: snprintf.h:34
block1
static int16_t block1[64]
Definition: dct.c:116
ff_vp78dsp_init
av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
Definition: vp8dsp.c:666