FFmpeg
tx_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Lynne <dev@lynne.ee>
3  * Power of two FFT:
4  * Copyright (c) 2008 Loren Merritt
5  * Copyright (c) 2002 Fabrice Bellard
6  * Partly based on libdjbfft by D. J. Bernstein
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /* All costabs for a type are defined here */
26 COSTABLE(16);
27 COSTABLE(32);
28 COSTABLE(64);
29 COSTABLE(128);
30 COSTABLE(256);
31 COSTABLE(512);
32 COSTABLE(1024);
33 COSTABLE(2048);
34 COSTABLE(4096);
35 COSTABLE(8192);
36 COSTABLE(16384);
37 COSTABLE(32768);
38 COSTABLE(65536);
39 COSTABLE(131072);
40 DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_53))[4];
41 
42 static FFTSample * const cos_tabs[18] = {
43  NULL,
44  NULL,
45  NULL,
46  NULL,
47  TX_NAME(ff_cos_16),
48  TX_NAME(ff_cos_32),
49  TX_NAME(ff_cos_64),
50  TX_NAME(ff_cos_128),
51  TX_NAME(ff_cos_256),
52  TX_NAME(ff_cos_512),
53  TX_NAME(ff_cos_1024),
54  TX_NAME(ff_cos_2048),
55  TX_NAME(ff_cos_4096),
56  TX_NAME(ff_cos_8192),
57  TX_NAME(ff_cos_16384),
58  TX_NAME(ff_cos_32768),
59  TX_NAME(ff_cos_65536),
60  TX_NAME(ff_cos_131072),
61 };
62 
64 {
65  int m = 1 << index;
66  double freq = 2*M_PI/m;
68  for(int i = 0; i <= m/4; i++)
69  tab[i] = RESCALE(cos(i*freq));
70  for(int i = 1; i < m/4; i++)
71  tab[m/2 - i] = tab[i];
72 }
73 
74 #define INIT_FF_COS_TABS_FUNC(index, size) \
75 static av_cold void init_cos_tabs_ ## size (void) \
76 { \
77  init_cos_tabs_idx(index); \
78 }
79 
86 INIT_FF_COS_TABS_FUNC(10, 1024)
87 INIT_FF_COS_TABS_FUNC(11, 2048)
88 INIT_FF_COS_TABS_FUNC(12, 4096)
89 INIT_FF_COS_TABS_FUNC(13, 8192)
90 INIT_FF_COS_TABS_FUNC(14, 16384)
91 INIT_FF_COS_TABS_FUNC(15, 32768)
92 INIT_FF_COS_TABS_FUNC(16, 65536)
93 INIT_FF_COS_TABS_FUNC(17, 131072)
94 
95 static av_cold void ff_init_53_tabs(void)
96 {
97  TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) };
98  TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 6)), RESCALE(cos(2 * M_PI / 6)) };
99  TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 5)), RESCALE(sin(2 * M_PI / 5)) };
100  TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) };
101 }
102 
105  { NULL },
106  { NULL },
107  { NULL },
108  { init_cos_tabs_16, AV_ONCE_INIT },
109  { init_cos_tabs_32, AV_ONCE_INIT },
110  { init_cos_tabs_64, AV_ONCE_INIT },
111  { init_cos_tabs_128, AV_ONCE_INIT },
112  { init_cos_tabs_256, AV_ONCE_INIT },
113  { init_cos_tabs_512, AV_ONCE_INIT },
114  { init_cos_tabs_1024, AV_ONCE_INIT },
115  { init_cos_tabs_2048, AV_ONCE_INIT },
116  { init_cos_tabs_4096, AV_ONCE_INIT },
117  { init_cos_tabs_8192, AV_ONCE_INIT },
118  { init_cos_tabs_16384, AV_ONCE_INIT },
119  { init_cos_tabs_32768, AV_ONCE_INIT },
120  { init_cos_tabs_65536, AV_ONCE_INIT },
121  { init_cos_tabs_131072, AV_ONCE_INIT },
122 };
123 
124 static av_cold void init_cos_tabs(int index)
125 {
128 }
129 
131  ptrdiff_t stride)
132 {
133  FFTComplex tmp[2];
134 #ifdef TX_INT32
135  int64_t mtmp[4];
136 #endif
137 
138  BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
139  BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
140 
141  out[0*stride].re = in[0].re + tmp[1].re;
142  out[0*stride].im = in[0].im + tmp[1].im;
143 
144 #ifdef TX_INT32
145  mtmp[0] = (int64_t)TX_NAME(ff_cos_53)[0].re * tmp[0].re;
146  mtmp[1] = (int64_t)TX_NAME(ff_cos_53)[0].im * tmp[0].im;
147  mtmp[2] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].re;
148  mtmp[3] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].im;
149  out[1*stride].re = in[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
150  out[1*stride].im = in[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
151  out[2*stride].re = in[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
152  out[2*stride].im = in[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
153 #else
154  tmp[0].re = TX_NAME(ff_cos_53)[0].re * tmp[0].re;
155  tmp[0].im = TX_NAME(ff_cos_53)[0].im * tmp[0].im;
156  tmp[1].re = TX_NAME(ff_cos_53)[1].re * tmp[1].re;
157  tmp[1].im = TX_NAME(ff_cos_53)[1].re * tmp[1].im;
158  out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
159  out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
160  out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
161  out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
162 #endif
163 }
164 
165 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
166 static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
167  ptrdiff_t stride) \
168 { \
169  FFTComplex z0[4], t[6]; \
170  \
171  BF(t[1].im, t[0].re, in[1].re, in[4].re); \
172  BF(t[1].re, t[0].im, in[1].im, in[4].im); \
173  BF(t[3].im, t[2].re, in[2].re, in[3].re); \
174  BF(t[3].re, t[2].im, in[2].im, in[3].im); \
175  \
176  out[D0*stride].re = in[0].re + t[0].re + t[2].re; \
177  out[D0*stride].im = in[0].im + t[0].im + t[2].im; \
178  \
179  SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \
180  SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \
181  CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \
182  CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \
183  \
184  BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
185  BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
186  BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
187  BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
188  \
189  out[D1*stride].re = in[0].re + z0[3].re; \
190  out[D1*stride].im = in[0].im + z0[0].im; \
191  out[D2*stride].re = in[0].re + z0[2].re; \
192  out[D2*stride].im = in[0].im + z0[1].im; \
193  out[D3*stride].re = in[0].re + z0[1].re; \
194  out[D3*stride].im = in[0].im + z0[2].im; \
195  out[D4*stride].re = in[0].re + z0[0].re; \
196  out[D4*stride].im = in[0].im + z0[3].im; \
197 }
198 
199 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
200 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
201 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
202 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
203 
205  ptrdiff_t stride)
206 {
207  FFTComplex tmp[15];
208 
209  for (int i = 0; i < 5; i++)
210  fft3(tmp + i, in + i*3, 5);
211 
212  fft5_m1(out, tmp + 0, stride);
213  fft5_m2(out, tmp + 5, stride);
214  fft5_m3(out, tmp + 10, stride);
215 }
216 
217 #define BUTTERFLIES(a0,a1,a2,a3) {\
218  BF(t3, t5, t5, t1);\
219  BF(a2.re, a0.re, a0.re, t5);\
220  BF(a3.im, a1.im, a1.im, t3);\
221  BF(t4, t6, t2, t6);\
222  BF(a3.re, a1.re, a1.re, t4);\
223  BF(a2.im, a0.im, a0.im, t6);\
224 }
225 
226 // force loading all the inputs before storing any.
227 // this is slightly slower for small data, but avoids store->load aliasing
228 // for addresses separated by large powers of 2.
229 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
230  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
231  BF(t3, t5, t5, t1);\
232  BF(a2.re, a0.re, r0, t5);\
233  BF(a3.im, a1.im, i1, t3);\
234  BF(t4, t6, t2, t6);\
235  BF(a3.re, a1.re, r1, t4);\
236  BF(a2.im, a0.im, i0, t6);\
237 }
238 
239 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
240  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
241  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
242  BUTTERFLIES(a0,a1,a2,a3)\
243 }
244 
245 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
246  t1 = a2.re;\
247  t2 = a2.im;\
248  t5 = a3.re;\
249  t6 = a3.im;\
250  BUTTERFLIES(a0,a1,a2,a3)\
251 }
252 
253 /* z[0...8n-1], w[1...2n-1] */
254 #define PASS(name)\
255 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
256 {\
257  FFTSample t1, t2, t3, t4, t5, t6;\
258  int o1 = 2*n;\
259  int o2 = 4*n;\
260  int o3 = 6*n;\
261  const FFTSample *wim = wre+o1;\
262  n--;\
263 \
264  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
265  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
266  do {\
267  z += 2;\
268  wre += 2;\
269  wim -= 2;\
270  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
271  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
272  } while(--n);\
273 }
274 
275 PASS(pass)
276 #undef BUTTERFLIES
277 #define BUTTERFLIES BUTTERFLIES_BIG
278 PASS(pass_big)
279 
280 #define DECL_FFT(n,n2,n4)\
281 static void fft##n(FFTComplex *z)\
282 {\
283  fft##n2(z);\
284  fft##n4(z+n4*2);\
285  fft##n4(z+n4*3);\
286  pass(z,TX_NAME(ff_cos_##n),n4/2);\
287 }
288 
289 static void fft2(FFTComplex *z)
290 {
291  FFTComplex tmp;
292  BF(tmp.re, z[0].re, z[0].re, z[1].re);
293  BF(tmp.im, z[0].im, z[0].im, z[1].im);
294  z[1] = tmp;
295 }
296 
297 static void fft4(FFTComplex *z)
298 {
299  FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
300 
301  BF(t3, t1, z[0].re, z[1].re);
302  BF(t8, t6, z[3].re, z[2].re);
303  BF(z[2].re, z[0].re, t1, t6);
304  BF(t4, t2, z[0].im, z[1].im);
305  BF(t7, t5, z[2].im, z[3].im);
306  BF(z[3].im, z[1].im, t4, t8);
307  BF(z[3].re, z[1].re, t3, t7);
308  BF(z[2].im, z[0].im, t2, t5);
309 }
310 
311 static void fft8(FFTComplex *z)
312 {
313  FFTSample t1, t2, t3, t4, t5, t6;
314 
315  fft4(z);
316 
317  BF(t1, z[5].re, z[4].re, -z[5].re);
318  BF(t2, z[5].im, z[4].im, -z[5].im);
319  BF(t5, z[7].re, z[6].re, -z[7].re);
320  BF(t6, z[7].im, z[6].im, -z[7].im);
321 
322  BUTTERFLIES(z[0],z[2],z[4],z[6]);
323  TRANSFORM(z[1],z[3],z[5],z[7],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
324 }
325 
326 static void fft16(FFTComplex *z)
327 {
328  FFTSample t1, t2, t3, t4, t5, t6;
329  FFTSample cos_16_1 = TX_NAME(ff_cos_16)[1];
330  FFTSample cos_16_3 = TX_NAME(ff_cos_16)[3];
331 
332  fft8(z);
333  fft4(z+8);
334  fft4(z+12);
335 
336  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
337  TRANSFORM(z[2],z[6],z[10],z[14],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
338  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
339  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
340 }
341 
342 DECL_FFT(32,16,8)
343 DECL_FFT(64,32,16)
344 DECL_FFT(128,64,32)
345 DECL_FFT(256,128,64)
346 DECL_FFT(512,256,128)
347 #define pass pass_big
348 DECL_FFT(1024,512,256)
349 DECL_FFT(2048,1024,512)
350 DECL_FFT(4096,2048,1024)
351 DECL_FFT(8192,4096,2048)
352 DECL_FFT(16384,8192,4096)
353 DECL_FFT(32768,16384,8192)
354 DECL_FFT(65536,32768,16384)
355 DECL_FFT(131072,65536,32768)
356 
357 static void (* const fft_dispatch[])(FFTComplex*) = {
358  NULL, fft2, fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512,
359  fft1024, fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
360 };
361 
362 #define DECL_COMP_FFT(N) \
363 static void compound_fft_##N##xM(AVTXContext *s, void *_out, \
364  void *_in, ptrdiff_t stride) \
365 { \
366  const int m = s->m, *in_map = s->pfatab, *out_map = in_map + N*m; \
367  FFTComplex *in = _in; \
368  FFTComplex *out = _out; \
369  FFTComplex fft##N##in[N]; \
370  void (*fftp)(FFTComplex *z) = fft_dispatch[av_log2(m)]; \
371  \
372  for (int i = 0; i < m; i++) { \
373  for (int j = 0; j < N; j++) \
374  fft##N##in[j] = in[in_map[i*N + j]]; \
375  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
376  } \
377  \
378  for (int i = 0; i < N; i++) \
379  fftp(s->tmp + m*i); \
380  \
381  for (int i = 0; i < N*m; i++) \
382  out[i] = s->tmp[out_map[i]]; \
383 }
384 
385 DECL_COMP_FFT(3)
386 DECL_COMP_FFT(5)
387 DECL_COMP_FFT(15)
388 
389 static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
390  ptrdiff_t stride)
391 {
392  FFTComplex *in = _in;
393  FFTComplex *out = _out;
394  int m = s->m, mb = av_log2(m);
395 
396  if (s->flags & AV_TX_INPLACE) {
397  FFTComplex tmp;
398  int src, dst, *inplace_idx = s->inplace_idx;
399 
400  src = *inplace_idx++;
401 
402  do {
403  tmp = out[src];
404  dst = s->revtab[src];
405  do {
406  FFSWAP(FFTComplex, tmp, out[dst]);
407  dst = s->revtab[dst];
408  } while (dst != src); /* Can be > as well, but is less predictable */
409  out[dst] = tmp;
410  } while ((src = *inplace_idx++));
411  } else {
412  for (int i = 0; i < m; i++)
413  out[i] = in[s->revtab[i]];
414  }
415 
416  fft_dispatch[mb](out);
417 }
418 
419 static void naive_fft(AVTXContext *s, void *_out, void *_in,
420  ptrdiff_t stride)
421 {
422  FFTComplex *in = _in;
423  FFTComplex *out = _out;
424  const int n = s->n;
425  double phase = s->inv ? 2.0*M_PI/n : -2.0*M_PI/n;
426 
427  for(int i = 0; i < n; i++) {
428  FFTComplex tmp = { 0 };
429  for(int j = 0; j < n; j++) {
430  const double factor = phase*i*j;
431  const FFTComplex mult = {
432  RESCALE(cos(factor)),
433  RESCALE(sin(factor)),
434  };
435  FFTComplex res;
436  CMUL3(res, in[j], mult);
437  tmp.re += res.re;
438  tmp.im += res.im;
439  }
440  out[i] = tmp;
441  }
442 }
443 
444 #define DECL_COMP_IMDCT(N) \
445 static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
446  ptrdiff_t stride) \
447 { \
448  FFTComplex fft##N##in[N]; \
449  FFTComplex *z = _dst, *exp = s->exptab; \
450  const int m = s->m, len8 = N*m >> 1; \
451  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
452  const FFTSample *src = _src, *in1, *in2; \
453  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)]; \
454  \
455  stride /= sizeof(*src); /* To convert it from bytes */ \
456  in1 = src; \
457  in2 = src + ((N*m*2) - 1) * stride; \
458  \
459  for (int i = 0; i < m; i++) { \
460  for (int j = 0; j < N; j++) { \
461  const int k = in_map[i*N + j]; \
462  FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \
463  CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
464  } \
465  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
466  } \
467  \
468  for (int i = 0; i < N; i++) \
469  fftp(s->tmp + m*i); \
470  \
471  for (int i = 0; i < len8; i++) { \
472  const int i0 = len8 + i, i1 = len8 - i - 1; \
473  const int s0 = out_map[i0], s1 = out_map[i1]; \
474  FFTComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
475  FFTComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
476  \
477  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
478  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
479  } \
480 }
481 
484 DECL_COMP_IMDCT(15)
485 
486 #define DECL_COMP_MDCT(N) \
487 static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
488  ptrdiff_t stride) \
489 { \
490  FFTSample *src = _src, *dst = _dst; \
491  FFTComplex *exp = s->exptab, tmp, fft##N##in[N]; \
492  const int m = s->m, len4 = N*m, len3 = len4 * 3, len8 = len4 >> 1; \
493  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
494  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)]; \
495  \
496  stride /= sizeof(*dst); \
497  \
498  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
499  for (int j = 0; j < N; j++) { \
500  const int k = in_map[i*N + j]; \
501  if (k < len4) { \
502  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
503  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
504  } else { \
505  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
506  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
507  } \
508  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
509  exp[k >> 1].re, exp[k >> 1].im); \
510  } \
511  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
512  } \
513  \
514  for (int i = 0; i < N; i++) \
515  fftp(s->tmp + m*i); \
516  \
517  for (int i = 0; i < len8; i++) { \
518  const int i0 = len8 + i, i1 = len8 - i - 1; \
519  const int s0 = out_map[i0], s1 = out_map[i1]; \
520  FFTComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
521  FFTComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
522  \
523  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
524  exp[i0].im, exp[i0].re); \
525  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
526  exp[i1].im, exp[i1].re); \
527  } \
528 }
529 
532 DECL_COMP_MDCT(15)
533 
534 static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
535  ptrdiff_t stride)
536 {
537  FFTComplex *z = _dst, *exp = s->exptab;
538  const int m = s->m, len8 = m >> 1;
539  const FFTSample *src = _src, *in1, *in2;
540  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)];
541 
542  stride /= sizeof(*src);
543  in1 = src;
544  in2 = src + ((m*2) - 1) * stride;
545 
546  for (int i = 0; i < m; i++) {
547  FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
548  CMUL3(z[s->revtab[i]], tmp, exp[i]);
549  }
550 
551  fftp(z);
552 
553  for (int i = 0; i < len8; i++) {
554  const int i0 = len8 + i, i1 = len8 - i - 1;
555  FFTComplex src1 = { z[i1].im, z[i1].re };
556  FFTComplex src0 = { z[i0].im, z[i0].re };
557 
558  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
559  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
560  }
561 }
562 
563 static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
564  ptrdiff_t stride)
565 {
566  FFTSample *src = _src, *dst = _dst;
567  FFTComplex *exp = s->exptab, tmp, *z = _dst;
568  const int m = s->m, len4 = m, len3 = len4 * 3, len8 = len4 >> 1;
569  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)];
570 
571  stride /= sizeof(*dst);
572 
573  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
574  const int k = 2*i;
575  if (k < len4) {
576  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]);
577  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
578  } else {
579  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
580  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
581  }
582  CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
583  exp[i].re, exp[i].im);
584  }
585 
586  fftp(z);
587 
588  for (int i = 0; i < len8; i++) {
589  const int i0 = len8 + i, i1 = len8 - i - 1;
590  FFTComplex src1 = { z[i1].re, z[i1].im };
591  FFTComplex src0 = { z[i0].re, z[i0].im };
592 
593  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
594  exp[i0].im, exp[i0].re);
595  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
596  exp[i1].im, exp[i1].re);
597  }
598 }
599 
600 static void naive_imdct(AVTXContext *s, void *_dst, void *_src,
601  ptrdiff_t stride)
602 {
603  int len = s->n;
604  int len2 = len*2;
605  FFTSample *src = _src;
606  FFTSample *dst = _dst;
607  double scale = s->scale;
608  const double phase = M_PI/(4.0*len2);
609 
610  stride /= sizeof(*src);
611 
612  for (int i = 0; i < len; i++) {
613  double sum_d = 0.0;
614  double sum_u = 0.0;
615  double i_d = phase * (4*len - 2*i - 1);
616  double i_u = phase * (3*len2 + 2*i + 1);
617  for (int j = 0; j < len2; j++) {
618  double a = (2 * j + 1);
619  double a_d = cos(a * i_d);
620  double a_u = cos(a * i_u);
621  double val = UNSCALE(src[j*stride]);
622  sum_d += a_d * val;
623  sum_u += a_u * val;
624  }
625  dst[i + 0] = RESCALE( sum_d*scale);
626  dst[i + len] = RESCALE(-sum_u*scale);
627  }
628 }
629 
630 static void naive_mdct(AVTXContext *s, void *_dst, void *_src,
631  ptrdiff_t stride)
632 {
633  int len = s->n*2;
634  FFTSample *src = _src;
635  FFTSample *dst = _dst;
636  double scale = s->scale;
637  const double phase = M_PI/(4.0*len);
638 
639  stride /= sizeof(*dst);
640 
641  for (int i = 0; i < len; i++) {
642  double sum = 0.0;
643  for (int j = 0; j < len*2; j++) {
644  int a = (2*j + 1 + len) * (2*i + 1);
645  sum += UNSCALE(src[j]) * cos(a * phase);
646  }
647  dst[i*stride] = RESCALE(sum*scale);
648  }
649 }
650 
651 static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
652 {
653  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
654 
655  if (!(s->exptab = av_malloc_array(len4, sizeof(*s->exptab))))
656  return AVERROR(ENOMEM);
657 
658  scale = sqrt(fabs(scale));
659  for (int i = 0; i < len4; i++) {
660  const double alpha = M_PI_2 * (i + theta) / len4;
661  s->exptab[i].re = RESCALE(cos(alpha) * scale);
662  s->exptab[i].im = RESCALE(sin(alpha) * scale);
663  }
664 
665  return 0;
666 }
667 
669  enum AVTXType type, int inv, int len,
670  const void *scale, uint64_t flags)
671 {
672  const int is_mdct = ff_tx_type_is_mdct(type);
673  int err, l, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) - 1);
674 
675  if (is_mdct)
676  len >>= 1;
677 
678  l = len;
679 
680 #define CHECK_FACTOR(DST, FACTOR, SRC) \
681  if (DST == 1 && !(SRC % FACTOR)) { \
682  DST = FACTOR; \
683  SRC /= FACTOR; \
684  }
685  CHECK_FACTOR(n, 15, len)
686  CHECK_FACTOR(n, 5, len)
687  CHECK_FACTOR(n, 3, len)
688 #undef CHECK_FACTOR
689 
690  /* len must be a power of two now */
691  if (!(len & (len - 1)) && len >= 2 && len <= max_ptwo) {
692  m = len;
693  len = 1;
694  }
695 
696  s->n = n;
697  s->m = m;
698  s->inv = inv;
699  s->type = type;
700  s->flags = flags;
701 
702  /* If we weren't able to split the length into factors we can handle,
703  * resort to using the naive and slow FT. This also filters out
704  * direct 3, 5 and 15 transforms as they're too niche. */
705  if (len > 1 || m == 1) {
706  if (is_mdct && (l & 1)) /* Odd (i)MDCTs are not supported yet */
707  return AVERROR(ENOSYS);
708  if (flags & AV_TX_INPLACE) /* Neither are in-place naive transforms */
709  return AVERROR(ENOSYS);
710  s->n = l;
711  s->m = 1;
712  *tx = naive_fft;
713  if (is_mdct) {
714  s->scale = *((SCALE_TYPE *)scale);
715  *tx = inv ? naive_imdct : naive_mdct;
716  }
717  return 0;
718  }
719 
720  if (n > 1 && m > 1) { /* 2D transform case */
721  if ((err = ff_tx_gen_compound_mapping(s)))
722  return err;
723  if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
724  return AVERROR(ENOMEM);
725  *tx = n == 3 ? compound_fft_3xM :
726  n == 5 ? compound_fft_5xM :
727  compound_fft_15xM;
728  if (is_mdct)
729  *tx = n == 3 ? inv ? compound_imdct_3xM : compound_mdct_3xM :
730  n == 5 ? inv ? compound_imdct_5xM : compound_mdct_5xM :
731  inv ? compound_imdct_15xM : compound_mdct_15xM;
732  } else { /* Direct transform case */
733  *tx = monolithic_fft;
734  if (is_mdct)
735  *tx = inv ? monolithic_imdct : monolithic_mdct;
736  }
737 
738  if (n != 1)
739  init_cos_tabs(0);
740  if (m != 1) {
741  if ((err = ff_tx_gen_ptwo_revtab(s, n == 1 && !is_mdct && !(flags & AV_TX_INPLACE))))
742  return err;
743  if (flags & AV_TX_INPLACE) {
744  if (is_mdct) /* In-place MDCTs are not supported yet */
745  return AVERROR(ENOSYS);
747  return err;
748  }
749  for (int i = 4; i <= av_log2(m); i++)
750  init_cos_tabs(i);
751  }
752 
753  if (is_mdct)
754  return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale));
755 
756  return 0;
757 }
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
stride
int stride
Definition: mace.c:144
monolithic_imdct
static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:534
ff_tx_gen_ptwo_inplace_revtab_idx
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
Definition: tx.c:113
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
fft16
static void fft16(FFTComplex *z)
Definition: tx_template.c:326
out
FILE * out
Definition: movenc.c:54
FFSWAP
#define FFSWAP(type, a, b)
Definition: common.h:108
TRANSFORM
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx_template.c:239
AVTXContext
Definition: tx_priv.h:108
fft2
static void fft2(FFTComplex *z)
Definition: tx_template.c:289
TRANSFORM_ZERO
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: tx_template.c:245
im
float im
Definition: fft.c:82
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:27
M_PI_2
#define M_PI_2
Definition: mathematics.h:55
CMUL3
#define CMUL3(c, a, b)
Definition: mdct15.c:41
t1
#define t1
Definition: regdef.h:29
fft5
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:31
fft4
static void fft4(FFTComplex *z)
Definition: tx_template.c:297
DECL_FFT5
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx_template.c:165
cos_tabs_init_once
static CosTabsInitOnce cos_tabs_init_once[]
Definition: tx_template.c:103
PASS
#define PASS(name)
Definition: tx_template.c:254
tab
static const struct twinvq_data tab
Definition: twinvq_data.h:10345
sum_d
static void sum_d(const int *input, int *output, int len)
Definition: dcadct.c:51
val
static double val(void *priv, double ch)
Definition: aeval.c:76
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
mult
static int16_t mult(Float11 *f1, Float11 *f2)
Definition: g726.c:55
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:175
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:99
fft8
static void fft8(FFTComplex *z)
Definition: tx_template.c:311
s
#define s(width, name)
Definition: cbs_vp9.c:257
ff_tx_gen_compound_mapping
int ff_tx_gen_compound_mapping(AVTXContext *s)
Definition: tx.c:44
t7
#define t7
Definition: regdef.h:35
gen_mdct_exptab
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
Definition: tx_template.c:651
ff_init_53_tabs
static av_cold void ff_init_53_tabs(void)
Definition: tx_template.c:95
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:173
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
monolithic_mdct
static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:563
NULL
#define NULL
Definition: coverity.c:32
t5
#define t5
Definition: regdef.h:33
t6
#define t6
Definition: regdef.h:34
AV_TX_INPLACE
@ AV_TX_INPLACE
Performs an in-place transformation on the input.
Definition: tx.h:110
src
#define src
Definition: vp8dsp.c:255
FFTSample
float FFTSample
Definition: avfft.h:35
exp
int8_t exp
Definition: eval.c:72
DECL_COMP_MDCT
#define DECL_COMP_MDCT(N)
Definition: tx_template.c:486
index
int index
Definition: gxfenc.c:89
FFTComplex
void FFTComplex
Definition: tx_priv.h:46
AVTXType
AVTXType
Definition: tx.h:39
CosTabsInitOnce
Definition: fft_template.c:69
CHECK_FACTOR
#define CHECK_FACTOR(DST, FACTOR, SRC)
FFTComplex::im
FFTSample im
Definition: avfft.h:38
FFTComplex::re
FFTSample re
Definition: avfft.h:38
t8
#define t8
Definition: regdef.h:53
BF
#define BF(a, b, c, s)
Definition: dct32_template.c:90
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
ff_tx_type_is_mdct
int ff_tx_type_is_mdct(enum AVTXType type)
Definition: tx.c:21
ff_tx_init_mdct_fft
int TX_NAME() ff_tx_init_mdct_fft(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Definition: tx_template.c:668
mb
#define mb
Definition: vf_colormatrix.c:101
M_PI
#define M_PI
Definition: mathematics.h:52
src0
#define src0
Definition: h264pred.c:139
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:117
DECL_FFT
#define DECL_FFT(n, n2, n4)
Definition: tx_template.c:280
src1
#define src1
Definition: h264pred.c:140
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
i
int i
Definition: input.c:407
t4
#define t4
Definition: regdef.h:32
t3
#define t3
Definition: regdef.h:31
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
ff_tx_gen_ptwo_revtab
int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
Definition: tx.c:94
av_always_inline
#define av_always_inline
Definition: attributes.h:49
DECL_COMP_IMDCT
#define DECL_COMP_IMDCT(N)
Definition: tx_template.c:444
COSTABLE
COSTABLE(16)
len
int len
Definition: vorbis_enc_data.h:452
pass
#define pass
Definition: tx_template.c:347
init_cos_tabs
static av_cold void init_cos_tabs(int index)
Definition: tx_template.c:124
fft15
static av_always_inline void fft15(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:204
fft_dispatch
static void(*const fft_dispatch[])(FFTComplex *)
Definition: tx_template.c:357
M_SQRT1_2
#define M_SQRT1_2
Definition: mathematics.h:58
INIT_FF_COS_TABS_FUNC
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: tx_template.c:74
t2
#define t2
Definition: regdef.h:30
cos_tabs
static FFTSample *const cos_tabs[18]
Definition: tx_template.c:42
BUTTERFLIES
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx_template.c:277
factor
static const int factor[16]
Definition: vf_pp7.c:77
TX_NAME
FFTComplex TX_NAME(ff_cos_53)[4]
init_cos_tabs_idx
static av_always_inline void init_cos_tabs_idx(int index)
Definition: tx_template.c:63
monolithic_fft
static void monolithic_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:389
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
naive_fft
static void naive_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:419
naive_mdct
static void naive_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:630
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
DECL_COMP_FFT
#define DECL_COMP_FFT(N)
Definition: tx_template.c:362
naive_imdct
static void naive_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:600
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
fft3
static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:130
FFTComplex
Definition: avfft.h:37
re
float re
Definition: fft.c:82