FFmpeg
h264dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Martin Storsjo
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 #include "checkasm.h"
23 #include "libavcodec/h264dsp.h"
24 #include "libavcodec/h264data.h"
25 #include "libavcodec/h264idct.h"
26 #include "libavcodec/h264_parse.h"
27 #include "libavutil/common.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/mem_internal.h"
30 
31 static const uint32_t pixel_mask[5] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x0fff0fff, 0x3fff3fff };
32 static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
33 
34 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
35 #define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
36 #define PIXEL_STRIDE 16
37 
38 #define randomize_buffers(idx) \
39  do { \
40  int x, y; \
41  uint32_t mask = pixel_mask[(idx)]; \
42  for (y = 0; y < sz; y++) { \
43  for (x = 0; x < PIXEL_STRIDE; x += 4) { \
44  AV_WN32A(src + y * PIXEL_STRIDE + x, rnd() & mask); \
45  AV_WN32A(dst + y * PIXEL_STRIDE + x, rnd() & mask); \
46  } \
47  for (x = 0; x < sz; x++) { \
48  if (bit_depth == 8) { \
49  coef[y * sz + x] = src[y * PIXEL_STRIDE + x] - \
50  dst[y * PIXEL_STRIDE + x]; \
51  } else { \
52  ((int32_t *)coef)[y * sz + x] = \
53  ((uint16_t *)src)[y * (PIXEL_STRIDE/2) + x] - \
54  ((uint16_t *)dst)[y * (PIXEL_STRIDE/2) + x]; \
55  } \
56  } \
57  } \
58  } while (0)
59 
60 #define dct4x4_impl(size, dctcoef) \
61 static void dct4x4_##size(dctcoef *coef) \
62 { \
63  int i, y, x; \
64  dctcoef tmp[16]; \
65  for (i = 0; i < 4; i++) { \
66  const int z0 = coef[i*4 + 0] + coef[i*4 + 3]; \
67  const int z1 = coef[i*4 + 1] + coef[i*4 + 2]; \
68  const int z2 = coef[i*4 + 0] - coef[i*4 + 3]; \
69  const int z3 = coef[i*4 + 1] - coef[i*4 + 2]; \
70  tmp[i + 4*0] = z0 + z1; \
71  tmp[i + 4*1] = 2*z2 + z3; \
72  tmp[i + 4*2] = z0 - z1; \
73  tmp[i + 4*3] = z2 - 2*z3; \
74  } \
75  for (i = 0; i < 4; i++) { \
76  const int z0 = tmp[i*4 + 0] + tmp[i*4 + 3]; \
77  const int z1 = tmp[i*4 + 1] + tmp[i*4 + 2]; \
78  const int z2 = tmp[i*4 + 0] - tmp[i*4 + 3]; \
79  const int z3 = tmp[i*4 + 1] - tmp[i*4 + 2]; \
80  coef[i*4 + 0] = z0 + z1; \
81  coef[i*4 + 1] = 2*z2 + z3; \
82  coef[i*4 + 2] = z0 - z1; \
83  coef[i*4 + 3] = z2 - 2*z3; \
84  } \
85  for (y = 0; y < 4; y++) { \
86  for (x = 0; x < 4; x++) { \
87  const int64_t scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 }; \
88  const int idx = (y & 1) + (x & 1); \
89  coef[y*4 + x] = (coef[y*4 + x] * scale[idx] + (1 << 14)) >> 15; \
90  } \
91  } \
92 }
93 
94 #define DCT8_1D(src, srcstride, dst, dststride) do { \
95  const int a0 = (src)[srcstride * 0] + (src)[srcstride * 7]; \
96  const int a1 = (src)[srcstride * 0] - (src)[srcstride * 7]; \
97  const int a2 = (src)[srcstride * 1] + (src)[srcstride * 6]; \
98  const int a3 = (src)[srcstride * 1] - (src)[srcstride * 6]; \
99  const int a4 = (src)[srcstride * 2] + (src)[srcstride * 5]; \
100  const int a5 = (src)[srcstride * 2] - (src)[srcstride * 5]; \
101  const int a6 = (src)[srcstride * 3] + (src)[srcstride * 4]; \
102  const int a7 = (src)[srcstride * 3] - (src)[srcstride * 4]; \
103  const int b0 = a0 + a6; \
104  const int b1 = a2 + a4; \
105  const int b2 = a0 - a6; \
106  const int b3 = a2 - a4; \
107  const int b4 = a3 + a5 + (a1 + (a1 >> 1)); \
108  const int b5 = a1 - a7 - (a5 + (a5 >> 1)); \
109  const int b6 = a1 + a7 - (a3 + (a3 >> 1)); \
110  const int b7 = a3 - a5 + (a7 + (a7 >> 1)); \
111  (dst)[dststride * 0] = b0 + b1; \
112  (dst)[dststride * 1] = b4 + (b7 >> 2); \
113  (dst)[dststride * 2] = b2 + (b3 >> 1); \
114  (dst)[dststride * 3] = b5 + (b6 >> 2); \
115  (dst)[dststride * 4] = b0 - b1; \
116  (dst)[dststride * 5] = b6 - (b5 >> 2); \
117  (dst)[dststride * 6] = (b2 >> 1) - b3; \
118  (dst)[dststride * 7] = (b4 >> 2) - b7; \
119 } while (0)
120 
121 #define dct8x8_impl(size, dctcoef) \
122 static void dct8x8_##size(dctcoef *coef) \
123 { \
124  int i, x, y; \
125  dctcoef tmp[64]; \
126  for (i = 0; i < 8; i++) \
127  DCT8_1D(coef + i, 8, tmp + i, 8); \
128  \
129  for (i = 0; i < 8; i++) \
130  DCT8_1D(tmp + 8*i, 1, coef + i, 8); \
131  \
132  for (y = 0; y < 8; y++) { \
133  for (x = 0; x < 8; x++) { \
134  static const int scale[] = { \
135  13107 * 20, 11428 * 18, 20972 * 32, \
136  12222 * 19, 16777 * 25, 15481 * 24, \
137  }; \
138  static const int idxmap[] = { \
139  0, 3, 4, 3, \
140  3, 1, 5, 1, \
141  4, 5, 2, 5, \
142  3, 1, 5, 1, \
143  }; \
144  const int idx = idxmap[(y & 3) * 4 + (x & 3)]; \
145  coef[y*8 + x] = ((int64_t)coef[y*8 + x] * \
146  scale[idx] + (1 << 17)) >> 18; \
147  } \
148  } \
149 }
150 
151 dct4x4_impl(16, int16_t)
152 dct4x4_impl(32, int32_t)
153 
154 dct8x8_impl(16, int16_t)
155 dct8x8_impl(32, int32_t)
156 
157 static void dct4x4(int16_t *coef, int bit_depth)
158 {
159  if (bit_depth == 8)
160  dct4x4_16(coef);
161  else
162  dct4x4_32((int32_t *) coef);
163 }
164 
165 static void dct8x8(int16_t *coef, int bit_depth)
166 {
167  if (bit_depth == 8) {
168  dct8x8_16(coef);
169  } else {
170  dct8x8_32((int32_t *) coef);
171  }
172 }
173 
174 
175 static void check_idct(void)
176 {
177  static const int depths[5] = { 8, 9, 10, 12, 14 };
178  LOCAL_ALIGNED_16(uint8_t, src, [8 * 8 * 2]);
179  LOCAL_ALIGNED_16(uint8_t, dst, [8 * 8 * 2]);
180  LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);
181  LOCAL_ALIGNED_16(uint8_t, dst1_base, [8 * 8 * 2 + 32]);
182  LOCAL_ALIGNED_16(int16_t, coef, [8 * 8 * 2]);
183  LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]);
184  LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]);
186  int bit_depth, sz, align, dc, i;
187  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride);
188 
189  for (i = 0; i < FF_ARRAY_ELEMS(depths); i++) {
190  bit_depth = depths[i];
192 
193  for (dc = 0; dc <= 2; dc++) {
194  for (sz = 4; sz <= 8; sz += 4) {
195  void (*idct)(uint8_t *, int16_t *, int) = NULL;
196  const char fmts[3][28] = {
197  "h264_idct%d_add_%dbpp", "h264_idct%d_dc_add_%dbpp",
198  "h264_add_pixels%d_%dbpp",
199  };
200 
202 
203  if (sz == 4)
204  dct4x4(coef, bit_depth);
205  else
206  dct8x8(coef, bit_depth);
207 
208  switch ((sz << 2) | dc) {
209  case (4 << 2) | 0: idct = h.h264_idct_add; break;
210  case (4 << 2) | 1: idct = h.h264_idct_dc_add; break;
211  case (4 << 2) | 2: idct = h.h264_add_pixels4_clear; break;
212  case (8 << 2) | 0: idct = h.h264_idct8_add; break;
213  case (8 << 2) | 1: idct = h.h264_idct8_dc_add; break;
214  case (8 << 2) | 2: idct = h.h264_add_pixels8_clear; break;
215  }
216 
217  if (check_func(idct, fmts[dc], sz, bit_depth)) {
218  for (align = 0; align < 16; align += sz * SIZEOF_PIXEL) {
219  uint8_t *dst1 = dst1_base + align;
220  if (dc) {
221  memset(subcoef0, 0, sz * sz * SIZEOF_COEF);
222  memcpy(subcoef0, coef, SIZEOF_COEF);
223  } else {
224  memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
225  }
226  memcpy(dst0, dst, sz * PIXEL_STRIDE);
227  memcpy(dst1, dst, sz * PIXEL_STRIDE);
228  memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
229  call_ref(dst0, subcoef0, PIXEL_STRIDE);
230  call_new(dst1, subcoef1, PIXEL_STRIDE);
231  if (memcmp(dst0, dst1, sz * PIXEL_STRIDE) ||
232  memcmp(subcoef0, subcoef1, sz * sz * SIZEOF_COEF))
233  fail();
234  bench_new(dst1, subcoef1, sz * SIZEOF_PIXEL);
235  }
236  }
237  }
238  }
239  }
240 }
241 
242 static void check_idct_multiple(void)
243 {
244  LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]);
245  LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);
246  LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]);
247  LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]);
248  LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);
249  LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);
250  LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]);
252  int bit_depth, i, y, func;
253  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);
254 
255  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
257  for (func = 0; func < 3; func++) {
258  void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;
259  const char *name;
260  int sz = 4, intra = 0;
261  int block_offset[16] = { 0 };
262  switch (func) {
263  case 0:
264  idct = h.h264_idct_add16;
265  name = "h264_idct_add16";
266  break;
267  case 1:
268  idct = h.h264_idct_add16intra;
269  name = "h264_idct_add16intra";
270  intra = 1;
271  break;
272  case 2:
273  idct = h.h264_idct8_add4;
274  name = "h264_idct8_add4";
275  sz = 8;
276  break;
277  }
278  memset(nnzc, 0, 15 * 8);
279  memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);
280  for (i = 0; i < 16 * 16; i += sz * sz) {
281  uint8_t src[8 * 8 * 2];
282  uint8_t dst[8 * 8 * 2];
283  int16_t coef[8 * 8 * 2];
284  int index = i / sz;
285  int block_y = (index / 16) * sz;
286  int block_x = index % 16;
287  int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
288  int nnz = rnd() % 3;
289 
291  if (sz == 4)
292  dct4x4(coef, bit_depth);
293  else
294  dct8x8(coef, bit_depth);
295 
296  for (y = 0; y < sz; y++)
297  memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],
298  &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);
299 
300  if (nnz > 1)
301  nnz = sz * sz;
302  memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],
303  coef, nnz * SIZEOF_COEF);
304 
305  if (intra && nnz == 1)
306  nnz = 0;
307 
308  nnzc[scan8[i / 16]] = nnz;
309  block_offset[i / 16] = offset;
310  }
311 
312  if (check_func(idct, "%s_%dbpp", name, bit_depth)) {
313  memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);
314  memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);
315  memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);
316  memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);
317  call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);
318  call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
319  if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||
320  memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))
321  fail();
322  bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
323  }
324  }
325  }
326 }
327 
328 static void check_idct_dequant(void)
329 {
330  static const int depths[5] = { 8, 9, 10, 12, 14 };
331  LOCAL_ALIGNED_16(int16_t, src16, [16]);
332  LOCAL_ALIGNED_16(int32_t, src32, [16]);
333  LOCAL_ALIGNED_16(int16_t, dst0_16, [16 * 16]);
334  LOCAL_ALIGNED_16(int16_t, dst1_16, [16 * 16]);
335  LOCAL_ALIGNED_16(int32_t, dst0_32, [16 * 16]);
336  LOCAL_ALIGNED_16(int32_t, dst1_32, [16 * 16]);
338  int bit_depth, i, qmul;
339  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_SSE2, void, int16_t *output, int16_t *input, int qmul);
340 
341  qmul = rnd() % 4096;
342 
343  for (i = 0; i < FF_ARRAY_ELEMS(depths); i++) {
344  bit_depth = depths[i];
346 
347  void *src, *dst_ref, *dst_new;
348  if (bit_depth == 8) {
349  src = src16;
350  dst_ref = dst0_16;
351  dst_new = dst1_16;
352  for (int j = 0; j < 16; j++)
353  src16[j] = (rnd() % 512) - 256;
354  } else {
355  src = src32;
356  dst_ref = dst0_32;
357  dst_new = dst1_32;
358  for (int j = 0; j < 16; j++)
359  src32[j] = (rnd() % (1 << (bit_depth + 1))) - (1 << bit_depth);
360  }
361  memset(dst_ref, 0, 16 * 16 * SIZEOF_COEF);
362  memset(dst_new, 0, 16 * 16 * SIZEOF_COEF);
363 
364  if (check_func(h.h264_luma_dc_dequant_idct, "h264_luma_dc_dequant_idct_%d", bit_depth)) {
365 
366  call_ref(dst_ref, src, qmul);
367  call_new(dst_new, src, qmul);
368  checkasm_check_dctcoef(dst0, 16*SIZEOF_COEF, dst1, 16*SIZEOF_COEF, 16, 16, "dst");
369  bench_new(dst_new, src, qmul);
370  }
371  }
372 }
373 
374 
375 static void check_loop_filter(void)
376 {
377  LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
378  LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
379  LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
381  int bit_depth;
382  int alphas[36], betas[36];
383  int8_t tc0[36][4];
384 
385  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
386  int alpha, int beta, int8_t *tc0);
387 
388  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
389  int i, j, a, c;
390  uint32_t mask = pixel_mask_lf[bit_depth - 8];
392  for (i = 35, a = 255, c = 250; i >= 0; i--) {
393  alphas[i] = a << (bit_depth - 8);
394  betas[i] = (i + 1) / 2 << (bit_depth - 8);
395  tc0[i][0] = tc0[i][3] = (c + 6) / 10;
396  tc0[i][1] = (c + 7) / 15;
397  tc0[i][2] = (c + 9) / 20;
398  a = a*9/10;
399  c = c*9/10;
400  }
401 
402 #define CHECK_LOOP_FILTER(name, align, idc) \
403  do { \
404  if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
405  for (j = 0; j < 36; j++) { \
406  intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
407  for (i = 0; i < 1024; i+=4) { \
408  AV_WN32A(dst + i, rnd() & mask); \
409  } \
410  memcpy(dst0, dst, 32 * 16 * 2); \
411  memcpy(dst1, dst, 32 * 16 * 2); \
412  \
413  call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
414  call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
415  if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
416  fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " \
417  "tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \
418  tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
419  fail(); \
420  } \
421  bench_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]);\
422  } \
423  } \
424  } while (0)
425 
432 
436 #undef CHECK_LOOP_FILTER
437  }
438 }
439 
440 static void check_loop_filter_intra(void)
441 {
442  LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
443  LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
444  LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
446  int bit_depth;
447  int alphas[36], betas[36];
448 
449  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
450  int alpha, int beta);
451 
452  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
453  int i, j, a;
454  uint32_t mask = pixel_mask_lf[bit_depth - 8];
456  for (i = 35, a = 255; i >= 0; i--) {
457  alphas[i] = a << (bit_depth - 8);
458  betas[i] = (i + 1) / 2 << (bit_depth - 8);
459  a = a*9/10;
460  }
461 
462 #define CHECK_LOOP_FILTER(name, align, idc) \
463  do { \
464  if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
465  for (j = 0; j < 36; j++) { \
466  intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
467  for (i = 0; i < 1024; i+=4) { \
468  AV_WN32A(dst + i, rnd() & mask); \
469  } \
470  memcpy(dst0, dst, 32 * 16 * 2); \
471  memcpy(dst1, dst, 32 * 16 * 2); \
472  \
473  call_ref(dst0 + off, 32, alphas[j], betas[j]); \
474  call_new(dst1 + off, 32, alphas[j], betas[j]); \
475  if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
476  fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d\n", \
477  j, alphas[j], betas[j]); \
478  fail(); \
479  } \
480  bench_new(dst1 + off, 32, alphas[j], betas[j]); \
481  } \
482  } \
483  } while (0)
484 
491 
495 #undef CHECK_LOOP_FILTER
496  }
497 }
498 
500 {
501  check_idct();
504  report("idct");
505 
507  report("loop_filter");
508 
510  report("loop_filter_intra");
511 }
CHECK_LOOP_FILTER
#define CHECK_LOOP_FILTER(name, align, idc)
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:196
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
mem_internal.h
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:226
check_idct
static void check_idct(void)
Definition: h264dsp.c:175
mask
int mask
Definition: mediacodecdec_common.c:154
h264_parse.h
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: h264dsp.c:34
check_func
#define check_func(func,...)
Definition: checkasm.h:190
h264_v_loop_filter_luma_intra
static void FUNCC() h264_v_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:218
call_ref
#define call_ref(...)
Definition: checkasm.h:205
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
pix
enum AVPixelFormat pix
Definition: ohcodec.c:55
h264_h_loop_filter_chroma_mbaff_intra
static void FUNCC() h264_h_loop_filter_chroma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:317
fail
#define fail()
Definition: checkasm.h:199
pixel_mask
static const uint32_t pixel_mask[5]
Definition: h264dsp.c:31
h264_h_loop_filter_luma_mbaff_intra
static void FUNCC() h264_h_loop_filter_luma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:226
checkasm.h
scan8
static const uint8_t scan8[16 *3+3]
Definition: h264_parse.h:40
idct
static void idct(int16_t block[64])
Definition: 4xm.c:167
h264_h_loop_filter_chroma_mbaff
static void FUNCC() h264_h_loop_filter_chroma_mbaff(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:272
h264_h_loop_filter_luma_intra
static void FUNCC() h264_h_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:222
rnd
#define rnd()
Definition: checkasm.h:183
checkasm_check_h264dsp
void checkasm_check_h264dsp(void)
Definition: h264dsp.c:499
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
checkasm_check_dctcoef
#define checkasm_check_dctcoef(buf1, stride1, buf2, stride2,...)
Definition: checkasm.h:454
intreadwrite.h
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
h264data.h
call_new
#define call_new(...)
Definition: checkasm.h:308
NULL
#define NULL
Definition: coverity.c:32
h264idct.h
h264dsp.h
index
int index
Definition: gxfenc.c:90
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
dct8x8
static void dct8x8(int16_t *coef, int bit_depth)
Definition: h264dsp.c:165
dct8x8_impl
#define dct8x8_impl(size, dctcoef)
Definition: h264dsp.c:121
AV_CPU_FLAG_SSE2
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
Definition: cpu.h:35
H264DSPContext
Context for storing H.264 DSP functions.
Definition: h264dsp.h:42
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
h264_v_loop_filter_chroma_intra
static void FUNCC() h264_v_loop_filter_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:309
align
static const uint8_t *BS_FUNC() align(BSCTX *bc)
Skip bits to a byte boundary.
Definition: bitstream_template.h:419
dct4x4_impl
#define dct4x4_impl(size, dctcoef)
Definition: h264dsp.c:60
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
check_idct_multiple
static void check_idct_multiple(void)
Definition: h264dsp.c:242
h264_h_loop_filter_luma
static void FUNCC() h264_h_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:156
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
check_loop_filter_intra
static void check_loop_filter_intra(void)
Definition: h264dsp.c:440
report
#define report
Definition: checkasm.h:202
check_loop_filter
static void check_loop_filter(void)
Definition: h264dsp.c:375
SIZEOF_COEF
#define SIZEOF_COEF
Definition: h264dsp.c:35
bench_new
#define bench_new(...)
Definition: checkasm.h:379
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
PIXEL_STRIDE
#define PIXEL_STRIDE
Definition: h264dsp.c:36
common.h
check_idct_dequant
static void check_idct_dequant(void)
Definition: h264dsp.c:328
randomize_buffers
#define randomize_buffers(idx)
Definition: h264dsp.c:38
stride
#define stride
Definition: h264pred_template.c:536
dct4x4
static void dct4x4(int16_t *coef)
Definition: vp8dsp.c:47
ff_h264dsp_init
av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
Definition: h264dsp.c:66
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:30
h264_v_loop_filter_chroma
static void FUNCC() h264_v_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:264
h264_h_loop_filter_chroma
static void FUNCC() h264_h_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:268
h264_h_loop_filter_chroma_intra
static void FUNCC() h264_h_loop_filter_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:313
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h264_h_loop_filter_luma_mbaff
static void FUNCC() h264_h_loop_filter_luma_mbaff(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:160
h
h
Definition: vp9dsp_template.c:2070
h264_v_loop_filter_luma
static void FUNCC() h264_v_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:152
src
#define src
Definition: vp8dsp.c:248
pixel_mask_lf
static const uint32_t pixel_mask_lf[3]
Definition: h264dsp.c:32