FFmpeg
h264dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Martin Storsjo
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 #include "checkasm.h"
23 #include "libavcodec/avcodec.h"
24 #include "libavcodec/h264dsp.h"
25 #include "libavcodec/h264data.h"
26 #include "libavcodec/h264_parse.h"
27 #include "libavutil/common.h"
28 #include "libavutil/internal.h"
29 #include "libavutil/intreadwrite.h"
30 #include "libavutil/mem_internal.h"
31 
32 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
33 static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
34 
35 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
36 #define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
37 #define PIXEL_STRIDE 16
38 
39 #define randomize_buffers() \
40  do { \
41  int x, y; \
42  uint32_t mask = pixel_mask[bit_depth - 8]; \
43  for (y = 0; y < sz; y++) { \
44  for (x = 0; x < PIXEL_STRIDE; x += 4) { \
45  AV_WN32A(src + y * PIXEL_STRIDE + x, rnd() & mask); \
46  AV_WN32A(dst + y * PIXEL_STRIDE + x, rnd() & mask); \
47  } \
48  for (x = 0; x < sz; x++) { \
49  if (bit_depth == 8) { \
50  coef[y * sz + x] = src[y * PIXEL_STRIDE + x] - \
51  dst[y * PIXEL_STRIDE + x]; \
52  } else { \
53  ((int32_t *)coef)[y * sz + x] = \
54  ((uint16_t *)src)[y * (PIXEL_STRIDE/2) + x] - \
55  ((uint16_t *)dst)[y * (PIXEL_STRIDE/2) + x]; \
56  } \
57  } \
58  } \
59  } while (0)
60 
61 #define dct4x4_impl(size, dctcoef) \
62 static void dct4x4_##size(dctcoef *coef) \
63 { \
64  int i, y, x; \
65  dctcoef tmp[16]; \
66  for (i = 0; i < 4; i++) { \
67  const int z0 = coef[i*4 + 0] + coef[i*4 + 3]; \
68  const int z1 = coef[i*4 + 1] + coef[i*4 + 2]; \
69  const int z2 = coef[i*4 + 0] - coef[i*4 + 3]; \
70  const int z3 = coef[i*4 + 1] - coef[i*4 + 2]; \
71  tmp[i + 4*0] = z0 + z1; \
72  tmp[i + 4*1] = 2*z2 + z3; \
73  tmp[i + 4*2] = z0 - z1; \
74  tmp[i + 4*3] = z2 - 2*z3; \
75  } \
76  for (i = 0; i < 4; i++) { \
77  const int z0 = tmp[i*4 + 0] + tmp[i*4 + 3]; \
78  const int z1 = tmp[i*4 + 1] + tmp[i*4 + 2]; \
79  const int z2 = tmp[i*4 + 0] - tmp[i*4 + 3]; \
80  const int z3 = tmp[i*4 + 1] - tmp[i*4 + 2]; \
81  coef[i*4 + 0] = z0 + z1; \
82  coef[i*4 + 1] = 2*z2 + z3; \
83  coef[i*4 + 2] = z0 - z1; \
84  coef[i*4 + 3] = z2 - 2*z3; \
85  } \
86  for (y = 0; y < 4; y++) { \
87  for (x = 0; x < 4; x++) { \
88  static const int scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 }; \
89  const int idx = (y & 1) + (x & 1); \
90  coef[y*4 + x] = (coef[y*4 + x] * scale[idx] + (1 << 14)) >> 15; \
91  } \
92  } \
93 }
94 
95 #define DCT8_1D(src, srcstride, dst, dststride) do { \
96  const int a0 = (src)[srcstride * 0] + (src)[srcstride * 7]; \
97  const int a1 = (src)[srcstride * 0] - (src)[srcstride * 7]; \
98  const int a2 = (src)[srcstride * 1] + (src)[srcstride * 6]; \
99  const int a3 = (src)[srcstride * 1] - (src)[srcstride * 6]; \
100  const int a4 = (src)[srcstride * 2] + (src)[srcstride * 5]; \
101  const int a5 = (src)[srcstride * 2] - (src)[srcstride * 5]; \
102  const int a6 = (src)[srcstride * 3] + (src)[srcstride * 4]; \
103  const int a7 = (src)[srcstride * 3] - (src)[srcstride * 4]; \
104  const int b0 = a0 + a6; \
105  const int b1 = a2 + a4; \
106  const int b2 = a0 - a6; \
107  const int b3 = a2 - a4; \
108  const int b4 = a3 + a5 + (a1 + (a1 >> 1)); \
109  const int b5 = a1 - a7 - (a5 + (a5 >> 1)); \
110  const int b6 = a1 + a7 - (a3 + (a3 >> 1)); \
111  const int b7 = a3 - a5 + (a7 + (a7 >> 1)); \
112  (dst)[dststride * 0] = b0 + b1; \
113  (dst)[dststride * 1] = b4 + (b7 >> 2); \
114  (dst)[dststride * 2] = b2 + (b3 >> 1); \
115  (dst)[dststride * 3] = b5 + (b6 >> 2); \
116  (dst)[dststride * 4] = b0 - b1; \
117  (dst)[dststride * 5] = b6 - (b5 >> 2); \
118  (dst)[dststride * 6] = (b2 >> 1) - b3; \
119  (dst)[dststride * 7] = (b4 >> 2) - b7; \
120 } while (0)
121 
122 #define dct8x8_impl(size, dctcoef) \
123 static void dct8x8_##size(dctcoef *coef) \
124 { \
125  int i, x, y; \
126  dctcoef tmp[64]; \
127  for (i = 0; i < 8; i++) \
128  DCT8_1D(coef + i, 8, tmp + i, 8); \
129  \
130  for (i = 0; i < 8; i++) \
131  DCT8_1D(tmp + 8*i, 1, coef + i, 8); \
132  \
133  for (y = 0; y < 8; y++) { \
134  for (x = 0; x < 8; x++) { \
135  static const int scale[] = { \
136  13107 * 20, 11428 * 18, 20972 * 32, \
137  12222 * 19, 16777 * 25, 15481 * 24, \
138  }; \
139  static const int idxmap[] = { \
140  0, 3, 4, 3, \
141  3, 1, 5, 1, \
142  4, 5, 2, 5, \
143  3, 1, 5, 1, \
144  }; \
145  const int idx = idxmap[(y & 3) * 4 + (x & 3)]; \
146  coef[y*8 + x] = ((int64_t)coef[y*8 + x] * \
147  scale[idx] + (1 << 17)) >> 18; \
148  } \
149  } \
150 }
151 
152 dct4x4_impl(16, int16_t)
153 dct4x4_impl(32, int32_t)
154 
155 dct8x8_impl(16, int16_t)
156 dct8x8_impl(32, int32_t)
157 
158 static void dct4x4(int16_t *coef, int bit_depth)
159 {
160  if (bit_depth == 8)
161  dct4x4_16(coef);
162  else
163  dct4x4_32((int32_t *) coef);
164 }
165 
166 static void dct8x8(int16_t *coef, int bit_depth)
167 {
168  if (bit_depth == 8) {
169  dct8x8_16(coef);
170  } else {
171  dct8x8_32((int32_t *) coef);
172  }
173 }
174 
175 
176 static void check_idct(void)
177 {
178  LOCAL_ALIGNED_16(uint8_t, src, [8 * 8 * 2]);
179  LOCAL_ALIGNED_16(uint8_t, dst, [8 * 8 * 2]);
180  LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);
181  LOCAL_ALIGNED_16(uint8_t, dst1_base, [8 * 8 * 2 + 32]);
182  LOCAL_ALIGNED_16(int16_t, coef, [8 * 8 * 2]);
183  LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]);
184  LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]);
186  int bit_depth, sz, align, dc;
187  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride);
188 
189  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
191  for (sz = 4; sz <= 8; sz += 4) {
193 
194  if (sz == 4)
195  dct4x4(coef, bit_depth);
196  else
197  dct8x8(coef, bit_depth);
198 
199  for (dc = 0; dc <= 1; dc++) {
200  void (*idct)(uint8_t *, int16_t *, int) = NULL;
201  switch ((sz << 1) | dc) {
202  case (4 << 1) | 0: idct = h.h264_idct_add; break;
203  case (4 << 1) | 1: idct = h.h264_idct_dc_add; break;
204  case (8 << 1) | 0: idct = h.h264_idct8_add; break;
205  case (8 << 1) | 1: idct = h.h264_idct8_dc_add; break;
206  }
207  if (check_func(idct, "h264_idct%d_add%s_%dbpp", sz, dc ? "_dc" : "", bit_depth)) {
208  for (align = 0; align < 16; align += sz * SIZEOF_PIXEL) {
209  uint8_t *dst1 = dst1_base + align;
210  if (dc) {
211  memset(subcoef0, 0, sz * sz * SIZEOF_COEF);
212  memcpy(subcoef0, coef, SIZEOF_COEF);
213  } else {
214  memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
215  }
216  memcpy(dst0, dst, sz * PIXEL_STRIDE);
217  memcpy(dst1, dst, sz * PIXEL_STRIDE);
218  memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
219  call_ref(dst0, subcoef0, PIXEL_STRIDE);
220  call_new(dst1, subcoef1, PIXEL_STRIDE);
221  if (memcmp(dst0, dst1, sz * PIXEL_STRIDE) ||
222  memcmp(subcoef0, subcoef1, sz * sz * SIZEOF_COEF))
223  fail();
224  bench_new(dst1, subcoef1, sz * SIZEOF_PIXEL);
225  }
226  }
227  }
228  }
229  }
230 }
231 
232 static void check_idct_multiple(void)
233 {
234  LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]);
235  LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);
236  LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]);
237  LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]);
238  LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);
239  LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);
240  LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]);
242  int bit_depth, i, y, func;
243  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);
244 
245  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
247  for (func = 0; func < 3; func++) {
248  void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;
249  const char *name;
250  int sz = 4, intra = 0;
251  int block_offset[16] = { 0 };
252  switch (func) {
253  case 0:
254  idct = h.h264_idct_add16;
255  name = "h264_idct_add16";
256  break;
257  case 1:
258  idct = h.h264_idct_add16intra;
259  name = "h264_idct_add16intra";
260  intra = 1;
261  break;
262  case 2:
263  idct = h.h264_idct8_add4;
264  name = "h264_idct8_add4";
265  sz = 8;
266  break;
267  }
268  memset(nnzc, 0, 15 * 8);
269  memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);
270  for (i = 0; i < 16 * 16; i += sz * sz) {
271  uint8_t src[8 * 8 * 2];
272  uint8_t dst[8 * 8 * 2];
273  int16_t coef[8 * 8 * 2];
274  int index = i / sz;
275  int block_y = (index / 16) * sz;
276  int block_x = index % 16;
277  int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
278  int nnz = rnd() % 3;
279 
281  if (sz == 4)
282  dct4x4(coef, bit_depth);
283  else
284  dct8x8(coef, bit_depth);
285 
286  for (y = 0; y < sz; y++)
287  memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],
288  &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);
289 
290  if (nnz > 1)
291  nnz = sz * sz;
292  memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],
293  coef, nnz * SIZEOF_COEF);
294 
295  if (intra && nnz == 1)
296  nnz = 0;
297 
298  nnzc[scan8[i / 16]] = nnz;
299  block_offset[i / 16] = offset;
300  }
301 
302  if (check_func(idct, "%s_%dbpp", name, bit_depth)) {
303  memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);
304  memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);
305  memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);
306  memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);
307  call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);
308  call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
309  if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||
310  memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))
311  fail();
312  bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
313  }
314  }
315  }
316 }
317 
318 
319 static void check_loop_filter(void)
320 {
321  LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
322  LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
323  LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
325  int bit_depth;
326  int alphas[36], betas[36];
327  int8_t tc0[36][4];
328 
329  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
330  int alpha, int beta, int8_t *tc0);
331 
332  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
333  int i, j, a, c;
334  uint32_t mask = pixel_mask_lf[bit_depth - 8];
336  for (i = 35, a = 255, c = 250; i >= 0; i--) {
337  alphas[i] = a << (bit_depth - 8);
338  betas[i] = (i + 1) / 2 << (bit_depth - 8);
339  tc0[i][0] = tc0[i][3] = (c + 6) / 10;
340  tc0[i][1] = (c + 7) / 15;
341  tc0[i][2] = (c + 9) / 20;
342  a = a*9/10;
343  c = c*9/10;
344  }
345 
346 #define CHECK_LOOP_FILTER(name, align, idc) \
347  do { \
348  if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
349  for (j = 0; j < 36; j++) { \
350  intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
351  for (i = 0; i < 1024; i+=4) { \
352  AV_WN32A(dst + i, rnd() & mask); \
353  } \
354  memcpy(dst0, dst, 32 * 16 * 2); \
355  memcpy(dst1, dst, 32 * 16 * 2); \
356  \
357  call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
358  call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
359  if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
360  fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " \
361  "tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \
362  tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
363  fail(); \
364  } \
365  bench_new(dst1, 32, alphas[j], betas[j], tc0[j]); \
366  } \
367  } \
368  } while (0)
369 
376 
380 #undef CHECK_LOOP_FILTER
381  }
382 }
383 
384 static void check_loop_filter_intra(void)
385 {
386  LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
387  LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
388  LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
390  int bit_depth;
391  int alphas[36], betas[36];
392 
393  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
394  int alpha, int beta);
395 
396  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
397  int i, j, a;
398  uint32_t mask = pixel_mask_lf[bit_depth - 8];
400  for (i = 35, a = 255; i >= 0; i--) {
401  alphas[i] = a << (bit_depth - 8);
402  betas[i] = (i + 1) / 2 << (bit_depth - 8);
403  a = a*9/10;
404  }
405 
406 #define CHECK_LOOP_FILTER(name, align, idc) \
407  do { \
408  if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
409  for (j = 0; j < 36; j++) { \
410  intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
411  for (i = 0; i < 1024; i+=4) { \
412  AV_WN32A(dst + i, rnd() & mask); \
413  } \
414  memcpy(dst0, dst, 32 * 16 * 2); \
415  memcpy(dst1, dst, 32 * 16 * 2); \
416  \
417  call_ref(dst0 + off, 32, alphas[j], betas[j]); \
418  call_new(dst1 + off, 32, alphas[j], betas[j]); \
419  if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
420  fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d\n", \
421  j, alphas[j], betas[j]); \
422  fail(); \
423  } \
424  bench_new(dst1, 32, alphas[j], betas[j]); \
425  } \
426  } \
427  } while (0)
428 
435 
439 #undef CHECK_LOOP_FILTER
440  }
441 }
442 
444 {
445  check_idct();
447  report("idct");
448 
450  report("loop_filter");
451 
453  report("loop_filter_intra");
454 }
CHECK_LOOP_FILTER
#define CHECK_LOOP_FILTER(name, align, idc)
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:135
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
mem_internal.h
check_idct
static void check_idct(void)
Definition: h264dsp.c:176
h264_parse.h
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: h264dsp.c:35
check_func
#define check_func(func,...)
Definition: checkasm.h:129
h264_v_loop_filter_luma_intra
static void FUNCC() h264_v_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:218
call_ref
#define call_ref(...)
Definition: checkasm.h:144
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:245
h264_h_loop_filter_chroma_mbaff_intra
static void FUNCC() h264_h_loop_filter_chroma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:317
fail
#define fail()
Definition: checkasm.h:138
h264_h_loop_filter_luma_mbaff_intra
static void FUNCC() h264_h_loop_filter_luma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:226
checkasm.h
scan8
static const uint8_t scan8[16 *3+3]
Definition: h264_parse.h:40
idct
static void idct(int16_t block[64])
Definition: 4xm.c:166
h264_h_loop_filter_chroma_mbaff
static void FUNCC() h264_h_loop_filter_chroma_mbaff(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:272
h264_h_loop_filter_luma_intra
static void FUNCC() h264_h_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:222
rnd
#define rnd()
Definition: checkasm.h:122
checkasm_check_h264dsp
void checkasm_check_h264dsp(void)
Definition: h264dsp.c:443
mask
static const uint16_t mask[17]
Definition: lzw.c:38
intreadwrite.h
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:129
h264data.h
call_new
#define call_new(...)
Definition: checkasm.h:226
NULL
#define NULL
Definition: coverity.c:32
pixel_mask
static const uint32_t pixel_mask[3]
Definition: h264dsp.c:32
h264dsp.h
index
int index
Definition: gxfenc.c:89
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
dct8x8
static void dct8x8(int16_t *coef, int bit_depth)
Definition: h264dsp.c:166
dct8x8_impl
#define dct8x8_impl(size, dctcoef)
Definition: h264dsp.c:122
H264DSPContext
Context for storing H.264 DSP functions.
Definition: h264dsp.h:42
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
h264_v_loop_filter_chroma_intra
static void FUNCC() h264_v_loop_filter_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:309
align
static const uint8_t *BS_FUNC() align(BSCTX *bc)
Skip bits to a byte boundary.
Definition: bitstream_template.h:411
dct4x4_impl
#define dct4x4_impl(size, dctcoef)
Definition: h264dsp.c:61
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
check_idct_multiple
static void check_idct_multiple(void)
Definition: h264dsp.c:232
h264_h_loop_filter_luma
static void FUNCC() h264_h_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:156
check_loop_filter_intra
static void check_loop_filter_intra(void)
Definition: h264dsp.c:384
report
#define report
Definition: checkasm.h:141
check_loop_filter
static void check_loop_filter(void)
Definition: h264dsp.c:319
SIZEOF_COEF
#define SIZEOF_COEF
Definition: h264dsp.c:36
bench_new
#define bench_new(...)
Definition: checkasm.h:291
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
internal.h
PIXEL_STRIDE
#define PIXEL_STRIDE
Definition: h264dsp.c:37
common.h
avcodec.h
stride
#define stride
Definition: h264pred_template.c:537
dct4x4
static void dct4x4(int16_t *coef)
Definition: vp8dsp.c:46
ff_h264dsp_init
av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
Definition: h264dsp.c:66
randomize_buffers
#define randomize_buffers()
Definition: h264dsp.c:39
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
h264_v_loop_filter_chroma
static void FUNCC() h264_v_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:264
h264_h_loop_filter_chroma
static void FUNCC() h264_h_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:268
h264_h_loop_filter_chroma_intra
static void FUNCC() h264_h_loop_filter_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_template.c:313
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h264_h_loop_filter_luma_mbaff
static void FUNCC() h264_h_loop_filter_luma_mbaff(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:160
h
h
Definition: vp9dsp_template.c:2038
int
int
Definition: ffmpeg_filter.c:368
h264_v_loop_filter_luma
static void FUNCC() h264_v_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_template.c:152
pixel_mask_lf
static const uint32_t pixel_mask_lf[3]
Definition: h264dsp.c:33